snowpark-connect 0.24.0__py3-none-any.whl → 0.25.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of snowpark-connect might be problematic. Click here for more details.

Files changed (474) hide show
  1. snowflake/snowpark_connect/column_name_handler.py +116 -4
  2. snowflake/snowpark_connect/config.py +13 -0
  3. snowflake/snowpark_connect/constants.py +0 -29
  4. snowflake/snowpark_connect/dataframe_container.py +6 -0
  5. snowflake/snowpark_connect/execute_plan/map_execution_command.py +56 -1
  6. snowflake/snowpark_connect/expression/literal.py +13 -2
  7. snowflake/snowpark_connect/expression/map_cast.py +5 -8
  8. snowflake/snowpark_connect/expression/map_sql_expression.py +23 -1
  9. snowflake/snowpark_connect/expression/map_udf.py +26 -8
  10. snowflake/snowpark_connect/expression/map_unresolved_attribute.py +199 -15
  11. snowflake/snowpark_connect/expression/map_unresolved_extract_value.py +44 -16
  12. snowflake/snowpark_connect/expression/map_unresolved_function.py +825 -353
  13. snowflake/snowpark_connect/expression/map_unresolved_star.py +3 -2
  14. snowflake/snowpark_connect/hidden_column.py +39 -0
  15. snowflake/snowpark_connect/includes/jars/hadoop-client-api-trimmed-3.3.4.jar +0 -0
  16. snowflake/snowpark_connect/includes/jars/{hadoop-client-api-3.3.4.jar → spark-connect-client-jvm_2.12-3.5.6.jar} +0 -0
  17. snowflake/snowpark_connect/relation/map_column_ops.py +17 -4
  18. snowflake/snowpark_connect/relation/map_extension.py +52 -11
  19. snowflake/snowpark_connect/relation/map_join.py +258 -62
  20. snowflake/snowpark_connect/relation/map_sql.py +88 -11
  21. snowflake/snowpark_connect/relation/map_udtf.py +4 -2
  22. snowflake/snowpark_connect/relation/read/map_read.py +3 -3
  23. snowflake/snowpark_connect/relation/read/map_read_jdbc.py +1 -1
  24. snowflake/snowpark_connect/relation/read/map_read_json.py +8 -1
  25. snowflake/snowpark_connect/relation/read/map_read_table.py +1 -9
  26. snowflake/snowpark_connect/relation/read/reader_config.py +3 -1
  27. snowflake/snowpark_connect/relation/write/map_write.py +62 -53
  28. snowflake/snowpark_connect/resources_initializer.py +29 -1
  29. snowflake/snowpark_connect/server.py +18 -3
  30. snowflake/snowpark_connect/type_mapping.py +29 -25
  31. snowflake/snowpark_connect/typed_column.py +14 -0
  32. snowflake/snowpark_connect/utils/artifacts.py +23 -0
  33. snowflake/snowpark_connect/utils/context.py +6 -1
  34. snowflake/snowpark_connect/utils/scala_udf_utils.py +588 -0
  35. snowflake/snowpark_connect/utils/telemetry.py +6 -17
  36. snowflake/snowpark_connect/utils/udf_helper.py +2 -0
  37. snowflake/snowpark_connect/utils/udf_utils.py +38 -7
  38. snowflake/snowpark_connect/utils/udtf_utils.py +17 -3
  39. snowflake/snowpark_connect/version.py +1 -1
  40. {snowpark_connect-0.24.0.dist-info → snowpark_connect-0.25.0.dist-info}/METADATA +1 -1
  41. snowpark_connect-0.25.0.dist-info/RECORD +477 -0
  42. snowflake/snowpark_connect/includes/jars/scala-compiler-2.12.18.jar +0 -0
  43. snowflake/snowpark_connect/includes/jars/spark-kubernetes_2.12-3.5.6.jar +0 -0
  44. snowflake/snowpark_connect/includes/jars/spark-mllib_2.12-3.5.6.jar +0 -0
  45. snowflake/snowpark_connect/includes/jars/spark-streaming_2.12-3.5.6.jar +0 -0
  46. snowflake/snowpark_connect/includes/python/pyspark/errors/tests/__init__.py +0 -16
  47. snowflake/snowpark_connect/includes/python/pyspark/errors/tests/test_errors.py +0 -60
  48. snowflake/snowpark_connect/includes/python/pyspark/ml/deepspeed/tests/test_deepspeed_distributor.py +0 -306
  49. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/__init__.py +0 -16
  50. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_connect_classification.py +0 -53
  51. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_connect_evaluation.py +0 -50
  52. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_connect_feature.py +0 -43
  53. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_connect_function.py +0 -114
  54. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_connect_pipeline.py +0 -47
  55. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_connect_summarizer.py +0 -43
  56. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_connect_tuning.py +0 -46
  57. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_legacy_mode_classification.py +0 -238
  58. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_legacy_mode_evaluation.py +0 -194
  59. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_legacy_mode_feature.py +0 -156
  60. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_legacy_mode_pipeline.py +0 -184
  61. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_legacy_mode_summarizer.py +0 -78
  62. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_legacy_mode_tuning.py +0 -292
  63. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_parity_torch_data_loader.py +0 -50
  64. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_parity_torch_distributor.py +0 -152
  65. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_algorithms.py +0 -456
  66. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_base.py +0 -96
  67. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_dl_util.py +0 -186
  68. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_evaluation.py +0 -77
  69. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_feature.py +0 -401
  70. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_functions.py +0 -528
  71. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_image.py +0 -82
  72. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_linalg.py +0 -409
  73. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_model_cache.py +0 -55
  74. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_param.py +0 -441
  75. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_persistence.py +0 -546
  76. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_pipeline.py +0 -71
  77. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_stat.py +0 -52
  78. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_training_summary.py +0 -494
  79. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_util.py +0 -85
  80. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_wrapper.py +0 -138
  81. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/tuning/__init__.py +0 -16
  82. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/tuning/test_cv_io_basic.py +0 -151
  83. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/tuning/test_cv_io_nested.py +0 -97
  84. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/tuning/test_cv_io_pipeline.py +0 -143
  85. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/tuning/test_tuning.py +0 -551
  86. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/tuning/test_tvs_io_basic.py +0 -137
  87. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/tuning/test_tvs_io_nested.py +0 -96
  88. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/tuning/test_tvs_io_pipeline.py +0 -142
  89. snowflake/snowpark_connect/includes/python/pyspark/ml/torch/tests/__init__.py +0 -16
  90. snowflake/snowpark_connect/includes/python/pyspark/ml/torch/tests/test_data_loader.py +0 -137
  91. snowflake/snowpark_connect/includes/python/pyspark/ml/torch/tests/test_distributor.py +0 -561
  92. snowflake/snowpark_connect/includes/python/pyspark/ml/torch/tests/test_log_communication.py +0 -172
  93. snowflake/snowpark_connect/includes/python/pyspark/mllib/tests/__init__.py +0 -16
  94. snowflake/snowpark_connect/includes/python/pyspark/mllib/tests/test_algorithms.py +0 -353
  95. snowflake/snowpark_connect/includes/python/pyspark/mllib/tests/test_feature.py +0 -192
  96. snowflake/snowpark_connect/includes/python/pyspark/mllib/tests/test_linalg.py +0 -680
  97. snowflake/snowpark_connect/includes/python/pyspark/mllib/tests/test_stat.py +0 -206
  98. snowflake/snowpark_connect/includes/python/pyspark/mllib/tests/test_streaming_algorithms.py +0 -471
  99. snowflake/snowpark_connect/includes/python/pyspark/mllib/tests/test_util.py +0 -108
  100. snowflake/snowpark_connect/includes/python/pyspark/pandas/spark/__init__.py +0 -16
  101. snowflake/snowpark_connect/includes/python/pyspark/pandas/spark/accessors.py +0 -1281
  102. snowflake/snowpark_connect/includes/python/pyspark/pandas/spark/functions.py +0 -203
  103. snowflake/snowpark_connect/includes/python/pyspark/pandas/spark/utils.py +0 -202
  104. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/__init__.py +0 -16
  105. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/__init__.py +0 -16
  106. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_any_all.py +0 -177
  107. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_apply_func.py +0 -575
  108. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_binary_ops.py +0 -235
  109. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_combine.py +0 -653
  110. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_compute.py +0 -463
  111. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_corrwith.py +0 -86
  112. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_cov.py +0 -151
  113. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_cumulative.py +0 -139
  114. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_describe.py +0 -458
  115. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_eval.py +0 -86
  116. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_melt.py +0 -202
  117. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_missing_data.py +0 -520
  118. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_pivot.py +0 -361
  119. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/__init__.py +0 -16
  120. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/__init__.py +0 -16
  121. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_any_all.py +0 -40
  122. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_apply_func.py +0 -42
  123. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_binary_ops.py +0 -40
  124. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_combine.py +0 -37
  125. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_compute.py +0 -60
  126. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_corrwith.py +0 -40
  127. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_cov.py +0 -40
  128. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_cumulative.py +0 -90
  129. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_describe.py +0 -40
  130. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_eval.py +0 -40
  131. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_melt.py +0 -40
  132. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_missing_data.py +0 -42
  133. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_pivot.py +0 -37
  134. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/__init__.py +0 -16
  135. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_base.py +0 -36
  136. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_binary_ops.py +0 -42
  137. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_boolean_ops.py +0 -47
  138. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_categorical_ops.py +0 -55
  139. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_complex_ops.py +0 -40
  140. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_date_ops.py +0 -47
  141. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_datetime_ops.py +0 -47
  142. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_null_ops.py +0 -42
  143. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_num_arithmetic.py +0 -43
  144. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_num_ops.py +0 -47
  145. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_num_reverse.py +0 -43
  146. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_string_ops.py +0 -47
  147. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_timedelta_ops.py +0 -47
  148. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_udt_ops.py +0 -40
  149. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/testing_utils.py +0 -226
  150. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/__init__.py +0 -16
  151. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_align.py +0 -39
  152. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_basic_slow.py +0 -55
  153. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_cov_corrwith.py +0 -39
  154. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_dot_frame.py +0 -39
  155. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_dot_series.py +0 -39
  156. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_index.py +0 -39
  157. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_series.py +0 -39
  158. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_setitem_frame.py +0 -43
  159. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_setitem_series.py +0 -43
  160. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/__init__.py +0 -16
  161. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_attrs.py +0 -40
  162. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_constructor.py +0 -39
  163. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_conversion.py +0 -42
  164. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_reindexing.py +0 -42
  165. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_reshaping.py +0 -37
  166. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_spark.py +0 -40
  167. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_take.py +0 -42
  168. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_time_series.py +0 -48
  169. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_truncate.py +0 -40
  170. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/__init__.py +0 -16
  171. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_aggregate.py +0 -40
  172. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_apply_func.py +0 -41
  173. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_cumulative.py +0 -67
  174. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_describe.py +0 -40
  175. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_groupby.py +0 -55
  176. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_head_tail.py +0 -40
  177. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_index.py +0 -38
  178. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_missing_data.py +0 -55
  179. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_split_apply.py +0 -39
  180. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_stat.py +0 -38
  181. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/__init__.py +0 -16
  182. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_align.py +0 -40
  183. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_base.py +0 -50
  184. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_category.py +0 -73
  185. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_datetime.py +0 -39
  186. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_indexing.py +0 -40
  187. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_reindex.py +0 -40
  188. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_rename.py +0 -40
  189. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_reset_index.py +0 -48
  190. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_timedelta.py +0 -39
  191. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/io/__init__.py +0 -16
  192. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/io/test_parity_io.py +0 -40
  193. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/plot/__init__.py +0 -16
  194. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/plot/test_parity_frame_plot.py +0 -45
  195. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/plot/test_parity_frame_plot_matplotlib.py +0 -45
  196. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/plot/test_parity_frame_plot_plotly.py +0 -49
  197. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/plot/test_parity_series_plot.py +0 -37
  198. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/plot/test_parity_series_plot_matplotlib.py +0 -53
  199. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/plot/test_parity_series_plot_plotly.py +0 -45
  200. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/__init__.py +0 -16
  201. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_all_any.py +0 -38
  202. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_arg_ops.py +0 -37
  203. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_as_of.py +0 -37
  204. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_as_type.py +0 -38
  205. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_compute.py +0 -37
  206. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_conversion.py +0 -40
  207. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_cumulative.py +0 -40
  208. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_index.py +0 -38
  209. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_missing_data.py +0 -40
  210. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_series.py +0 -37
  211. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_sort.py +0 -38
  212. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_stat.py +0 -38
  213. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_categorical.py +0 -66
  214. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_config.py +0 -37
  215. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_csv.py +0 -37
  216. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_dataframe_conversion.py +0 -42
  217. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_dataframe_spark_io.py +0 -39
  218. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_default_index.py +0 -49
  219. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_ewm.py +0 -37
  220. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_expanding.py +0 -39
  221. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_extension.py +0 -49
  222. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_frame_spark.py +0 -53
  223. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_generic_functions.py +0 -43
  224. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_indexing.py +0 -49
  225. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_indexops_spark.py +0 -39
  226. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_internal.py +0 -41
  227. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_namespace.py +0 -39
  228. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_numpy_compat.py +0 -60
  229. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_ops_on_diff_frames.py +0 -48
  230. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_ops_on_diff_frames_groupby.py +0 -39
  231. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_ops_on_diff_frames_groupby_expanding.py +0 -44
  232. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_ops_on_diff_frames_groupby_rolling.py +0 -84
  233. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_repr.py +0 -37
  234. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_resample.py +0 -45
  235. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_reshape.py +0 -39
  236. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_rolling.py +0 -39
  237. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_scalars.py +0 -37
  238. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_series_conversion.py +0 -39
  239. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_series_datetime.py +0 -39
  240. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_series_string.py +0 -39
  241. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_spark_functions.py +0 -39
  242. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_sql.py +0 -43
  243. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_stats.py +0 -37
  244. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_typedef.py +0 -36
  245. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_utils.py +0 -37
  246. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_window.py +0 -39
  247. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/__init__.py +0 -16
  248. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_base.py +0 -107
  249. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_binary_ops.py +0 -224
  250. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_boolean_ops.py +0 -825
  251. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_categorical_ops.py +0 -562
  252. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_complex_ops.py +0 -368
  253. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_date_ops.py +0 -257
  254. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_datetime_ops.py +0 -260
  255. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_null_ops.py +0 -178
  256. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_num_arithmetic.py +0 -184
  257. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_num_ops.py +0 -497
  258. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_num_reverse.py +0 -140
  259. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_string_ops.py +0 -354
  260. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_timedelta_ops.py +0 -219
  261. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_udt_ops.py +0 -192
  262. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/testing_utils.py +0 -228
  263. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/__init__.py +0 -16
  264. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_align.py +0 -118
  265. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_basic_slow.py +0 -198
  266. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_cov_corrwith.py +0 -181
  267. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_dot_frame.py +0 -103
  268. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_dot_series.py +0 -141
  269. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_index.py +0 -109
  270. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_series.py +0 -136
  271. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_setitem_frame.py +0 -125
  272. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_setitem_series.py +0 -217
  273. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/__init__.py +0 -16
  274. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_attrs.py +0 -384
  275. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_constructor.py +0 -598
  276. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_conversion.py +0 -73
  277. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_reindexing.py +0 -869
  278. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_reshaping.py +0 -487
  279. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_spark.py +0 -309
  280. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_take.py +0 -156
  281. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_time_series.py +0 -149
  282. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_truncate.py +0 -163
  283. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/__init__.py +0 -16
  284. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_aggregate.py +0 -311
  285. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_apply_func.py +0 -524
  286. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_cumulative.py +0 -419
  287. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_describe.py +0 -144
  288. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_groupby.py +0 -979
  289. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_head_tail.py +0 -234
  290. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_index.py +0 -206
  291. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_missing_data.py +0 -421
  292. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_split_apply.py +0 -187
  293. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_stat.py +0 -397
  294. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/__init__.py +0 -16
  295. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_align.py +0 -100
  296. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_base.py +0 -2743
  297. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_category.py +0 -484
  298. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_datetime.py +0 -276
  299. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_indexing.py +0 -432
  300. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_reindex.py +0 -310
  301. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_rename.py +0 -257
  302. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_reset_index.py +0 -160
  303. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_timedelta.py +0 -128
  304. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/io/__init__.py +0 -16
  305. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/io/test_io.py +0 -137
  306. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/plot/__init__.py +0 -16
  307. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/plot/test_frame_plot.py +0 -170
  308. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/plot/test_frame_plot_matplotlib.py +0 -547
  309. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/plot/test_frame_plot_plotly.py +0 -285
  310. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/plot/test_series_plot.py +0 -106
  311. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/plot/test_series_plot_matplotlib.py +0 -409
  312. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/plot/test_series_plot_plotly.py +0 -247
  313. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/__init__.py +0 -16
  314. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_all_any.py +0 -105
  315. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_arg_ops.py +0 -197
  316. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_as_of.py +0 -137
  317. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_as_type.py +0 -227
  318. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_compute.py +0 -634
  319. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_conversion.py +0 -88
  320. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_cumulative.py +0 -139
  321. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_index.py +0 -475
  322. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_missing_data.py +0 -265
  323. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_series.py +0 -818
  324. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_sort.py +0 -162
  325. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_stat.py +0 -780
  326. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_categorical.py +0 -741
  327. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_config.py +0 -160
  328. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_csv.py +0 -453
  329. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_dataframe_conversion.py +0 -281
  330. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_dataframe_spark_io.py +0 -487
  331. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_default_index.py +0 -109
  332. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_ewm.py +0 -434
  333. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_expanding.py +0 -253
  334. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_extension.py +0 -152
  335. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_frame_spark.py +0 -162
  336. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_generic_functions.py +0 -234
  337. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_indexing.py +0 -1339
  338. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_indexops_spark.py +0 -82
  339. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_internal.py +0 -124
  340. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_namespace.py +0 -638
  341. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_numpy_compat.py +0 -200
  342. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_ops_on_diff_frames.py +0 -1355
  343. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_ops_on_diff_frames_groupby.py +0 -655
  344. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_ops_on_diff_frames_groupby_expanding.py +0 -113
  345. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_ops_on_diff_frames_groupby_rolling.py +0 -118
  346. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_repr.py +0 -192
  347. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_resample.py +0 -346
  348. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_reshape.py +0 -495
  349. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_rolling.py +0 -263
  350. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_scalars.py +0 -59
  351. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_series_conversion.py +0 -85
  352. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_series_datetime.py +0 -364
  353. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_series_string.py +0 -362
  354. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_spark_functions.py +0 -46
  355. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_sql.py +0 -123
  356. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_stats.py +0 -581
  357. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_typedef.py +0 -447
  358. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_utils.py +0 -301
  359. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_window.py +0 -465
  360. snowflake/snowpark_connect/includes/python/pyspark/resource/tests/__init__.py +0 -16
  361. snowflake/snowpark_connect/includes/python/pyspark/resource/tests/test_resources.py +0 -83
  362. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/__init__.py +0 -16
  363. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/__init__.py +0 -16
  364. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/client/__init__.py +0 -16
  365. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/client/test_artifact.py +0 -420
  366. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/client/test_client.py +0 -358
  367. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/streaming/__init__.py +0 -16
  368. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/streaming/test_parity_foreach.py +0 -36
  369. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/streaming/test_parity_foreach_batch.py +0 -44
  370. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/streaming/test_parity_listener.py +0 -116
  371. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/streaming/test_parity_streaming.py +0 -35
  372. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_connect_basic.py +0 -3612
  373. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_connect_column.py +0 -1042
  374. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_connect_function.py +0 -2381
  375. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_connect_plan.py +0 -1060
  376. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_arrow.py +0 -163
  377. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_arrow_map.py +0 -38
  378. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_arrow_python_udf.py +0 -48
  379. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_catalog.py +0 -36
  380. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_column.py +0 -55
  381. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_conf.py +0 -36
  382. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_dataframe.py +0 -96
  383. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_datasources.py +0 -44
  384. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_errors.py +0 -36
  385. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_functions.py +0 -59
  386. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_group.py +0 -36
  387. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_pandas_cogrouped_map.py +0 -59
  388. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_pandas_grouped_map.py +0 -74
  389. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_pandas_grouped_map_with_state.py +0 -62
  390. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_pandas_map.py +0 -58
  391. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_pandas_udf.py +0 -70
  392. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_pandas_udf_grouped_agg.py +0 -50
  393. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_pandas_udf_scalar.py +0 -68
  394. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_pandas_udf_window.py +0 -40
  395. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_readwriter.py +0 -46
  396. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_serde.py +0 -44
  397. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_types.py +0 -100
  398. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_udf.py +0 -100
  399. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_udtf.py +0 -163
  400. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_session.py +0 -181
  401. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_utils.py +0 -42
  402. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/__init__.py +0 -16
  403. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_cogrouped_map.py +0 -623
  404. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_grouped_map.py +0 -869
  405. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_grouped_map_with_state.py +0 -342
  406. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_map.py +0 -436
  407. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_udf.py +0 -363
  408. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_udf_grouped_agg.py +0 -592
  409. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_udf_scalar.py +0 -1503
  410. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_udf_typehints.py +0 -392
  411. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_udf_typehints_with_future_annotations.py +0 -375
  412. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_udf_window.py +0 -411
  413. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/streaming/__init__.py +0 -16
  414. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/streaming/test_streaming.py +0 -401
  415. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/streaming/test_streaming_foreach.py +0 -295
  416. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/streaming/test_streaming_foreach_batch.py +0 -106
  417. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/streaming/test_streaming_listener.py +0 -558
  418. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_arrow.py +0 -1346
  419. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_arrow_map.py +0 -182
  420. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_arrow_python_udf.py +0 -202
  421. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_catalog.py +0 -503
  422. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_column.py +0 -225
  423. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_conf.py +0 -83
  424. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_context.py +0 -201
  425. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_dataframe.py +0 -1931
  426. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_datasources.py +0 -256
  427. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_errors.py +0 -69
  428. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_functions.py +0 -1349
  429. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_group.py +0 -53
  430. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_pandas_sqlmetrics.py +0 -68
  431. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_readwriter.py +0 -283
  432. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_serde.py +0 -155
  433. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_session.py +0 -412
  434. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_types.py +0 -1581
  435. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_udf.py +0 -961
  436. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_udf_profiler.py +0 -165
  437. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_udtf.py +0 -1456
  438. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_utils.py +0 -1686
  439. snowflake/snowpark_connect/includes/python/pyspark/streaming/tests/__init__.py +0 -16
  440. snowflake/snowpark_connect/includes/python/pyspark/streaming/tests/test_context.py +0 -184
  441. snowflake/snowpark_connect/includes/python/pyspark/streaming/tests/test_dstream.py +0 -706
  442. snowflake/snowpark_connect/includes/python/pyspark/streaming/tests/test_kinesis.py +0 -118
  443. snowflake/snowpark_connect/includes/python/pyspark/streaming/tests/test_listener.py +0 -160
  444. snowflake/snowpark_connect/includes/python/pyspark/tests/__init__.py +0 -16
  445. snowflake/snowpark_connect/includes/python/pyspark/tests/test_appsubmit.py +0 -306
  446. snowflake/snowpark_connect/includes/python/pyspark/tests/test_broadcast.py +0 -196
  447. snowflake/snowpark_connect/includes/python/pyspark/tests/test_conf.py +0 -44
  448. snowflake/snowpark_connect/includes/python/pyspark/tests/test_context.py +0 -346
  449. snowflake/snowpark_connect/includes/python/pyspark/tests/test_daemon.py +0 -89
  450. snowflake/snowpark_connect/includes/python/pyspark/tests/test_install_spark.py +0 -124
  451. snowflake/snowpark_connect/includes/python/pyspark/tests/test_join.py +0 -69
  452. snowflake/snowpark_connect/includes/python/pyspark/tests/test_memory_profiler.py +0 -167
  453. snowflake/snowpark_connect/includes/python/pyspark/tests/test_pin_thread.py +0 -194
  454. snowflake/snowpark_connect/includes/python/pyspark/tests/test_profiler.py +0 -168
  455. snowflake/snowpark_connect/includes/python/pyspark/tests/test_rdd.py +0 -939
  456. snowflake/snowpark_connect/includes/python/pyspark/tests/test_rddbarrier.py +0 -52
  457. snowflake/snowpark_connect/includes/python/pyspark/tests/test_rddsampler.py +0 -66
  458. snowflake/snowpark_connect/includes/python/pyspark/tests/test_readwrite.py +0 -368
  459. snowflake/snowpark_connect/includes/python/pyspark/tests/test_serializers.py +0 -257
  460. snowflake/snowpark_connect/includes/python/pyspark/tests/test_shuffle.py +0 -267
  461. snowflake/snowpark_connect/includes/python/pyspark/tests/test_stage_sched.py +0 -153
  462. snowflake/snowpark_connect/includes/python/pyspark/tests/test_statcounter.py +0 -130
  463. snowflake/snowpark_connect/includes/python/pyspark/tests/test_taskcontext.py +0 -350
  464. snowflake/snowpark_connect/includes/python/pyspark/tests/test_util.py +0 -97
  465. snowflake/snowpark_connect/includes/python/pyspark/tests/test_worker.py +0 -271
  466. snowpark_connect-0.24.0.dist-info/RECORD +0 -898
  467. {snowpark_connect-0.24.0.data → snowpark_connect-0.25.0.data}/scripts/snowpark-connect +0 -0
  468. {snowpark_connect-0.24.0.data → snowpark_connect-0.25.0.data}/scripts/snowpark-session +0 -0
  469. {snowpark_connect-0.24.0.data → snowpark_connect-0.25.0.data}/scripts/snowpark-submit +0 -0
  470. {snowpark_connect-0.24.0.dist-info → snowpark_connect-0.25.0.dist-info}/WHEEL +0 -0
  471. {snowpark_connect-0.24.0.dist-info → snowpark_connect-0.25.0.dist-info}/licenses/LICENSE-binary +0 -0
  472. {snowpark_connect-0.24.0.dist-info → snowpark_connect-0.25.0.dist-info}/licenses/LICENSE.txt +0 -0
  473. {snowpark_connect-0.24.0.dist-info → snowpark_connect-0.25.0.dist-info}/licenses/NOTICE-binary +0 -0
  474. {snowpark_connect-0.24.0.dist-info → snowpark_connect-0.25.0.dist-info}/top_level.txt +0 -0
@@ -34,6 +34,7 @@ def map_unresolved_star(
34
34
  column_mapping: ColumnNameMap,
35
35
  typer: ExpressionTyper,
36
36
  ) -> tuple[list[str], TypedColumn]:
37
+
37
38
  if exp.unresolved_star.HasField("unparsed_target"):
38
39
  unparsed_target = exp.unresolved_star.unparsed_target
39
40
  name_parts = split_fully_qualified_spark_name(unparsed_target)
@@ -102,7 +103,7 @@ def map_unresolved_star(
102
103
  prefix_candidate_str = f"{prefix_candidate_str}.{name_parts[i]}"
103
104
  prefix_candidate = (
104
105
  column_mapping.get_snowpark_column_name_from_spark_column_name(
105
- prefix_candidate_str, allow_non_exists=True
106
+ prefix_candidate_str, allow_non_exists=True, is_qualified=(i > 0)
106
107
  )
107
108
  )
108
109
  if prefix_candidate is None:
@@ -180,7 +181,7 @@ def map_unresolved_star_struct(
180
181
  prefix_candidate_str = f"{prefix_candidate_str}.{name_parts[i]}"
181
182
  prefix_candidate = (
182
183
  column_mapping.get_snowpark_column_name_from_spark_column_name(
183
- prefix_candidate_str, allow_non_exists=True
184
+ prefix_candidate_str, allow_non_exists=True, is_qualified=(i > 0)
184
185
  )
185
186
  )
186
187
  if prefix_candidate is None:
@@ -0,0 +1,39 @@
1
+ #
2
+ # Copyright (c) 2012-2025 Snowflake Computing Inc. All rights reserved.
3
+ #
4
+
5
+
6
+ class HiddenColumn:
7
+ """
8
+ Represents a hidden column in a Snowflake table.
9
+
10
+ Hidden columns are not visible in standard queries but can be accessed
11
+ directly if needed. This class provides a way to reference such columns
12
+ in Snowpark operations
13
+ """
14
+
15
+ def __init__(
16
+ self,
17
+ hidden_snowpark_name: str,
18
+ spark_name: str,
19
+ visible_snowpark_name: str,
20
+ qualifiers: list[str] | None = None,
21
+ original_position: int | None = None,
22
+ ) -> None:
23
+ """
24
+ Initializes a HiddenColumn instance.
25
+
26
+ Args:
27
+ name (str): The name of the hidden column.
28
+ """
29
+
30
+ # The Snowpark internal name for the hidden column
31
+ self.hidden_snowpark_name = hidden_snowpark_name
32
+ # The Spark name for the hidden column
33
+ self.spark_name = spark_name
34
+ # The left side visible Snowpark name for the dropped right side column
35
+ self.visible_snowpark_name = visible_snowpark_name
36
+ # Qualifiers for the hidden column (e.g., table or schema names)
37
+ self.qualifiers = qualifiers if qualifiers is not None else []
38
+ # The position of the hidden column in the original schema
39
+ self.original_position = original_position
@@ -735,10 +735,17 @@ def map_with_columns(
735
735
  """
736
736
  input_container = map_relation(rel.with_columns.input)
737
737
  input_df = input_container.dataframe
738
- with_columns = [
739
- map_alias(alias, input_container.column_map, ExpressionTyper(input_df))
740
- for alias in rel.with_columns.aliases
741
- ]
738
+ with_columns = []
739
+ for alias in rel.with_columns.aliases:
740
+ spark_names, typed_alias = map_alias(
741
+ alias, input_container.column_map, ExpressionTyper(input_df)
742
+ )
743
+ register_lca_alias(spark_names[0], typed_alias)
744
+ with_columns.append((spark_names, typed_alias))
745
+
746
+ # we don't need lateral aliases anymore
747
+ clear_lca_alias_map()
748
+
742
749
  # TODO: This list needs to contain all unique column names, but the code below doesn't
743
750
  # guarantee that.
744
751
  with_columns_names = []
@@ -806,6 +813,12 @@ def map_with_columns(
806
813
  with_columns_names_deduped, with_columns_exprs_deduped
807
814
  ).select(*new_snowpark_columns)
808
815
 
816
+ # SNOW-2306644: the next projection after a withColumn call can completely remove the added column
817
+ # df.withColumn("new").select("foo").filter("new") will fail with a missing column error
818
+ # the column will be preserved if flattening is disabled
819
+ if hasattr(result, "_select_statement"):
820
+ result._select_statement.flatten_disabled = True
821
+
809
822
  snowpark_name_to_type = dict(
810
823
  [(f.name, f.datatype) for f in input_df.schema.fields]
811
824
  + list(zip(with_columns_names, with_columns_types))
@@ -350,15 +350,51 @@ def map_aggregate(
350
350
  if not is_group_by_all:
351
351
  raw_groupings = [_map_column(exp) for exp in aggregate.grouping_expressions]
352
352
 
353
- # Set the current grouping columns in context for grouping_id() function
354
- grouping_spark_columns = [spark_name for spark_name, _ in raw_groupings]
355
- set_current_grouping_columns(grouping_spark_columns)
353
+ # Determine grouping columns for context
354
+ # For GROUPING SETS, we need to extract the columns from the sets
355
+ grouping_columns_for_context = []
356
+ if aggregate.group_type == snowflake_proto.Aggregate.GROUP_TYPE_GROUPING_SETS:
357
+ # Use a list to preserve order, avoiding duplicates
358
+ for grouping_set in aggregate.grouping_sets:
359
+ for exp in grouping_set.grouping_set:
360
+ spark_names, _ = map_expression(exp, input_container.column_map, typer)
361
+ # map_expression always returns a list, get the first element
362
+ col_name = spark_names[0]
363
+ if col_name not in grouping_columns_for_context:
364
+ grouping_columns_for_context.append(col_name)
365
+ else:
366
+ grouping_columns_for_context = [spark_name for spark_name, _ in raw_groupings]
367
+
368
+ # Set grouping columns context for processing aggregate expressions
369
+ # This context is needed for resolving grouping__id references
370
+ # TODO: This should properly handle nested queries with GROUP BY using push/pop
371
+ # Currently, nested queries may interfere with parent queries
372
+ set_current_grouping_columns(grouping_columns_for_context)
373
+
374
+ # LCA Support for aggregate expressions: Use the LCA alias map
375
+ # Note: We don't clear the map here to preserve any parent context aliases
376
+ from snowflake.snowpark_connect.utils.context import register_lca_alias
356
377
 
357
378
  agg_count = get_sql_aggregate_function_count()
358
379
  for exp in aggregate.aggregate_expressions:
359
380
  col = _map_column(exp)
360
381
  raw_aggregations.append(col)
361
382
 
383
+ # If this is an alias, register it in the LCA map for subsequent expressions
384
+ if (
385
+ exp.WhichOneof("expr_type") == "alias"
386
+ and exp.alias.name
387
+ and len(exp.alias.name) > 0
388
+ ):
389
+ alias_name = exp.alias.name[0]
390
+ spark_name, snowpark_column = col
391
+
392
+ # Register the alias pointing to the result of its expression
393
+ # This handles both simple aliases (k as lca) and complex ones (lca + 1 as col)
394
+ # The snowpark_column already contains the computed expression with its alias wrapper,
395
+ # which is fine - when referenced later, the column's value is what gets used
396
+ register_lca_alias(alias_name, snowpark_column)
397
+
362
398
  if is_group_by_all:
363
399
  new_agg_count = get_sql_aggregate_function_count()
364
400
  if new_agg_count == agg_count:
@@ -404,15 +440,20 @@ def map_aggregate(
404
440
  case snowflake_proto.Aggregate.GROUP_TYPE_CUBE:
405
441
  result = input_df.cube(groupings)
406
442
  case snowflake_proto.Aggregate.GROUP_TYPE_GROUPING_SETS:
407
- # TODO: What do we do about groupings?
408
- sets = (
409
- [
410
- map_expression(exp, input_container.column_map, typer)[1].col
411
- for exp in grouping_sets.grouping_set
412
- ]
413
- for grouping_sets in aggregate.grouping_sets
443
+ # Map each grouping set to columns
444
+ sets_mapped = []
445
+ for grouping_set in aggregate.grouping_sets:
446
+ set_cols = []
447
+ for exp in grouping_set.grouping_set:
448
+ _, typed_col = map_expression(
449
+ exp, input_container.column_map, typer
450
+ )
451
+ set_cols.append(typed_col.col)
452
+ sets_mapped.append(set_cols)
453
+
454
+ result = input_df.group_by_grouping_sets(
455
+ snowpark.GroupingSets(*sets_mapped)
414
456
  )
415
- result = input_df.group_by_grouping_sets(snowpark.GroupingSets(*sets))
416
457
  case other:
417
458
  raise SnowparkConnectNotImplementedError(
418
459
  f"Unsupported GROUP BY type: {other}"
@@ -1,13 +1,18 @@
1
1
  #
2
2
  # Copyright (c) 2012-2025 Snowflake Computing Inc. All rights reserved.
3
3
  #
4
-
4
+ from collections import Counter
5
5
  from functools import reduce
6
6
 
7
7
  import pyspark.sql.connect.proto.relations_pb2 as relation_proto
8
+ from pyspark.errors.exceptions.base import AnalysisException
8
9
 
9
10
  import snowflake.snowpark.functions as snowpark_fn
10
11
  from snowflake import snowpark
12
+ from snowflake.snowpark._internal.analyzer.analyzer_utils import (
13
+ quote_name_without_upper_casing,
14
+ unquote_if_quoted,
15
+ )
11
16
  from snowflake.snowpark_connect.column_name_handler import JoinColumnNameMap
12
17
  from snowflake.snowpark_connect.config import global_config
13
18
  from snowflake.snowpark_connect.constants import COLUMN_METADATA_COLLISION_KEY
@@ -17,6 +22,7 @@ from snowflake.snowpark_connect.expression.map_expression import (
17
22
  map_single_column_expression,
18
23
  )
19
24
  from snowflake.snowpark_connect.expression.typer import JoinExpressionTyper
25
+ from snowflake.snowpark_connect.hidden_column import HiddenColumn
20
26
  from snowflake.snowpark_connect.relation.map_relation import (
21
27
  NATURAL_JOIN_TYPE_BASE,
22
28
  map_relation,
@@ -24,7 +30,6 @@ from snowflake.snowpark_connect.relation.map_relation import (
24
30
  from snowflake.snowpark_connect.utils.context import (
25
31
  push_evaluating_join_condition,
26
32
  push_sql_scope,
27
- set_plan_id_map,
28
33
  set_sql_plan_name,
29
34
  )
30
35
  from snowflake.snowpark_connect.utils.telemetry import (
@@ -33,6 +38,9 @@ from snowflake.snowpark_connect.utils.telemetry import (
33
38
 
34
39
  USING_COLUMN_NOT_FOUND_ERROR = "[UNRESOLVED_USING_COLUMN_FOR_JOIN] USING column `{0}` not found on the {1} side of the join. The {1}-side columns: {2}"
35
40
 
41
+ DUPLICATED_JOIN_COL_LSUFFIX = "_left"
42
+ DUPLICATED_JOIN_COL_RSUFFIX = "_right"
43
+
36
44
 
37
45
  def map_join(rel: relation_proto.Relation) -> DataFrameContainer:
38
46
  left_container: DataFrameContainer = map_relation(rel.join.left)
@@ -74,6 +82,13 @@ def map_join(rel: relation_proto.Relation) -> DataFrameContainer:
74
82
 
75
83
  # This handles case sensitivity for using_columns
76
84
  case_corrected_right_columns: list[str] = []
85
+ hidden_columns = set()
86
+ # Propagate the hidden columns from left/right inputs to the result in case of chained joins
87
+ if left_container.column_map.hidden_columns:
88
+ hidden_columns.update(left_container.column_map.hidden_columns)
89
+
90
+ if right_container.column_map.hidden_columns:
91
+ hidden_columns.update(right_container.column_map.hidden_columns)
77
92
 
78
93
  if rel.join.HasField("join_condition"):
79
94
  assert not using_columns
@@ -105,8 +120,8 @@ def map_join(rel: relation_proto.Relation) -> DataFrameContainer:
105
120
  right=right_input,
106
121
  on=join_expression.col,
107
122
  how=join_type,
108
- lsuffix="_left",
109
- rsuffix="_right",
123
+ lsuffix=DUPLICATED_JOIN_COL_LSUFFIX,
124
+ rsuffix=DUPLICATED_JOIN_COL_RSUFFIX,
110
125
  )
111
126
  elif using_columns:
112
127
  if any(
@@ -156,12 +171,24 @@ def map_join(rel: relation_proto.Relation) -> DataFrameContainer:
156
171
  )
157
172
  )
158
173
 
174
+ using_columns_snowpark_names = (
175
+ left_container.column_map.get_snowpark_column_names_from_spark_column_names(
176
+ list(using_columns), return_first=True
177
+ )
178
+ )
179
+
180
+ using_columns_snowpark_types = [
181
+ left_container.dataframe.schema.fields[idx].datatype
182
+ for idx, col in enumerate(left_container.column_map.get_snowpark_columns())
183
+ if col in using_columns_snowpark_names
184
+ ]
185
+
159
186
  # Round trip the using columns through the column map to get the correct names
160
187
  # in order to support case sensitivity.
161
188
  # TODO: case_corrected_left_columns / case_corrected_right_columns may no longer be required as Snowpark dataframe preserves the column casing now.
162
- case_corrected_left_columns = left_container.column_map.get_spark_column_names_from_snowpark_column_names(
163
- left_container.column_map.get_snowpark_column_names_from_spark_column_names(
164
- list(using_columns), return_first=True
189
+ case_corrected_left_columns = (
190
+ left_container.column_map.get_spark_column_names_from_snowpark_column_names(
191
+ using_columns_snowpark_names
165
192
  )
166
193
  )
167
194
  case_corrected_right_columns = right_container.column_map.get_spark_column_names_from_snowpark_column_names(
@@ -195,28 +222,141 @@ def map_join(rel: relation_proto.Relation) -> DataFrameContainer:
195
222
  (left == right for left, right in snowpark_using_columns),
196
223
  ),
197
224
  how=join_type,
225
+ rsuffix=DUPLICATED_JOIN_COL_RSUFFIX,
198
226
  )
227
+ # If we disambiguated the snowpark_using_columns during the join, we need to update 'snowpark_using_columns' to
228
+ # use the disambiguated names.
229
+ disambiguated_snowpark_using_columns = []
230
+
231
+ # Ignore disambiguation for LEFT SEMI JOIN and LEFT ANTI JOIN because they drop the right columns, so it'll never disambiguate.
232
+ if join_type in ["leftsemi", "leftanti"]:
233
+ disambiguated_snowpark_using_columns = snowpark_using_columns
234
+ else:
235
+ normalized_joined_columns = [
236
+ unquote_if_quoted(col) for col in joined_df.columns
237
+ ]
238
+ # snowpark_using_columns is a list of tuples of snowpark columns, joined_df.columns is a list of strings of column names
239
+ for (left, right) in snowpark_using_columns:
240
+ normalized_left_name = unquote_if_quoted(left.getName())
241
+ normalized_right_name = unquote_if_quoted(right.getName())
242
+
243
+ # are both left and right in joined_df? if not, it's been disambiguated
244
+ if (
245
+ normalized_left_name in normalized_joined_columns
246
+ and normalized_right_name in normalized_joined_columns
247
+ ):
248
+ # we want to just add this
249
+ disambiguated_snowpark_using_columns.append((left, right))
250
+ else:
251
+ # we need to figure out the disambiguated names and add those - it only disambiguates if left == right
252
+ disambiguated_left: snowpark.Column | None = None
253
+ disambiguated_right: snowpark.Column | None = None
254
+
255
+ for col in normalized_joined_columns:
256
+ quoted_col = f'"{col}"'
257
+ # get the column name and cross check it to see if it ends with the og name
258
+ if col.endswith(normalized_left_name) and col.startswith("l_"):
259
+ disambiguated_left = joined_df[quoted_col]
260
+ elif col.endswith(normalized_right_name) and col.startswith(
261
+ "r_"
262
+ ):
263
+ disambiguated_right = joined_df[quoted_col]
264
+
265
+ # If we have both disambiguated columns, we can break out of the loop to save processing time
266
+ if (
267
+ disambiguated_left is not None
268
+ and disambiguated_right is not None
269
+ ):
270
+ break
271
+ if disambiguated_left is None or disambiguated_right is None:
272
+ raise AnalysisException(
273
+ f"Disambiguated columns not found for {normalized_left_name} and {normalized_right_name}."
274
+ )
275
+ disambiguated_snowpark_using_columns.append(
276
+ (disambiguated_left, disambiguated_right)
277
+ )
278
+
199
279
  # For outer joins, we need to preserve join keys from both sides using COALESCE
280
+ """
281
+ CHANGES:
282
+ - IF CASE
283
+ - Need to drop the using columns
284
+ - Need to create the hidden_columns DF with the using columns from right and left
285
+ - ELSE CASE
286
+ - Need to drop the right side using columns
287
+ - Need to create the hidden_columns DF with the using columns from right
288
+ """
200
289
  if join_type == "full_outer":
201
290
  coalesced_columns = []
202
- columns_to_drop = []
203
- for i, (left_col, right_col) in enumerate(snowpark_using_columns):
291
+ for i, (left_col, _right_col) in enumerate(snowpark_using_columns):
204
292
  # Use the original user-specified column name to preserve case sensitivity
205
- original_column_name = rel.join.using_columns[i]
206
- coalesced_col = snowpark_fn.coalesce(left_col, right_col).alias(
207
- original_column_name
208
- )
293
+ # Use the disambiguated columns for coalescing
294
+ disambiguated_left_col = disambiguated_snowpark_using_columns[i][0]
295
+ disambiguated_right_col = disambiguated_snowpark_using_columns[i][1]
296
+
297
+ coalesced_col = snowpark_fn.coalesce(
298
+ disambiguated_left_col, disambiguated_right_col
299
+ ).alias(left_col.get_name())
209
300
  coalesced_columns.append(coalesced_col)
210
- columns_to_drop.extend([left_col, right_col])
211
301
 
302
+ # Create HiddenColumn objects for each hidden column
303
+ hidden_left = HiddenColumn(
304
+ hidden_snowpark_name=disambiguated_left_col.getName(),
305
+ spark_name=case_corrected_left_columns[i],
306
+ visible_snowpark_name=left_col.get_name(),
307
+ qualifiers=left_container.column_map.get_qualifier_for_spark_column(
308
+ case_corrected_left_columns[i]
309
+ ),
310
+ original_position=left_container.column_map.get_spark_columns().index(
311
+ case_corrected_left_columns[i]
312
+ ),
313
+ )
314
+
315
+ hidden_right = HiddenColumn(
316
+ hidden_snowpark_name=disambiguated_right_col.getName(),
317
+ spark_name=case_corrected_right_columns[i],
318
+ visible_snowpark_name=left_col.get_name(),
319
+ qualifiers=right_container.column_map.get_qualifier_for_spark_column(
320
+ case_corrected_right_columns[i]
321
+ ),
322
+ original_position=right_container.column_map.get_spark_columns().index(
323
+ case_corrected_right_columns[i]
324
+ ),
325
+ )
326
+ hidden_columns.update(
327
+ [
328
+ hidden_left,
329
+ hidden_right,
330
+ ]
331
+ )
332
+
333
+ # All non-hidden columns (not including the coalesced columns)
212
334
  other_columns = [
213
335
  snowpark_fn.col(col_name)
214
336
  for col_name in joined_df.columns
215
- if col_name not in [col.getName() for col in columns_to_drop]
337
+ if col_name not in [col.hidden_snowpark_name for col in hidden_columns]
216
338
  ]
217
339
  result = joined_df.select(coalesced_columns + other_columns)
340
+
218
341
  else:
219
342
  result = joined_df.drop(*(right for _, right in snowpark_using_columns))
343
+ # We never run into the disambiguation case unless it's a full outer join.
344
+ for i, (left_col, right_col) in enumerate(
345
+ disambiguated_snowpark_using_columns
346
+ ):
347
+ # Only right side columns are hidden
348
+ hidden_col = HiddenColumn(
349
+ hidden_snowpark_name=right_col.getName(),
350
+ spark_name=case_corrected_right_columns[i],
351
+ visible_snowpark_name=left_col.getName(),
352
+ qualifiers=right_container.column_map.get_qualifier_for_spark_column(
353
+ case_corrected_right_columns[i]
354
+ ),
355
+ original_position=right_container.column_map.get_spark_columns().index(
356
+ case_corrected_right_columns[i]
357
+ ),
358
+ )
359
+ hidden_columns.add(hidden_col)
220
360
  else:
221
361
  if join_type != "cross" and not global_config.spark_sql_crossJoin_enabled:
222
362
  raise SparkException.implicit_cartesian_product("inner")
@@ -230,35 +370,110 @@ def map_join(rel: relation_proto.Relation) -> DataFrameContainer:
230
370
  # - LEFT SEMI JOIN: Returns left rows that have matches in right table (no right columns)
231
371
  # - LEFT ANTI JOIN: Returns left rows that have NO matches in right table (no right columns)
232
372
  # Both preserve only the columns from the left DataFrame without adding any columns from the right.
233
- spark_cols_after_join: list[str] = left_container.column_map.get_spark_columns()
373
+ spark_cols_after_join = left_container.column_map.get_spark_columns()
374
+ snowpark_cols_after_join = left_container.column_map.get_snowpark_columns()
375
+ snowpark_col_types = [
376
+ f.datatype for f in left_container.dataframe.schema.fields
377
+ ]
234
378
  qualifiers = left_container.column_map.get_qualifiers()
379
+ elif join_type == "full_outer" and using_columns:
380
+ # We want the coalesced columns to be first, followed by all the left and right columns (excluding using columns)
381
+ spark_cols_after_join: list[str] = []
382
+ snowpark_cols_after_join: list[str] = []
383
+ snowpark_col_types: list[str] = []
384
+
385
+ left_container_snowpark_columns = (
386
+ left_container.column_map.get_snowpark_columns()
387
+ )
388
+ right_container_snowpark_columns = (
389
+ right_container.column_map.get_snowpark_columns()
390
+ )
391
+
392
+ qualifiers = []
393
+ for i in range(len(case_corrected_left_columns)):
394
+ spark_cols_after_join.append(case_corrected_left_columns[i])
395
+ snowpark_cols_after_join.append(using_columns_snowpark_names[i])
396
+ snowpark_col_types.append(using_columns_snowpark_types[i])
397
+ qualifiers.append([])
398
+
399
+ # Handle adding left and right columns, excluding the using columns
400
+ for i, spark_col in enumerate(left_container.column_map.get_spark_columns()):
401
+ if (
402
+ spark_col not in case_corrected_left_columns
403
+ or spark_col in left_container.column_map.get_spark_columns()[:i]
404
+ ):
405
+ spark_cols_after_join.append(spark_col)
406
+ snowpark_cols_after_join.append(left_container_snowpark_columns[i])
407
+ qualifiers.append(
408
+ left_container.column_map.get_qualifier_for_spark_column(spark_col)
409
+ )
410
+
411
+ snowpark_col_types.append(
412
+ left_container.dataframe.schema.fields[i].datatype
413
+ )
414
+
415
+ for i, spark_col in enumerate(right_container.column_map.get_spark_columns()):
416
+ if (
417
+ spark_col not in case_corrected_right_columns
418
+ or spark_col in right_container.column_map.get_spark_columns()[:i]
419
+ ):
420
+ spark_cols_after_join.append(spark_col)
421
+ snowpark_cols_after_join.append(right_container_snowpark_columns[i])
422
+ qualifiers.append(
423
+ right_container.column_map.get_qualifier_for_spark_column(spark_col)
424
+ )
425
+
426
+ snowpark_col_types.append(
427
+ right_container.dataframe.schema.fields[i].datatype
428
+ )
429
+
235
430
  else:
236
- # Add Spark columns and plan_ids from left DF
237
- spark_cols_after_join: list[str] = list(
238
- left_container.column_map.get_spark_columns()
239
- ) + [
240
- spark_col
241
- for i, spark_col in enumerate(
242
- right_container.column_map.get_spark_columns()
243
- )
244
- if spark_col not in case_corrected_right_columns
245
- or spark_col
246
- in right_container.column_map.get_spark_columns()[
247
- :i
248
- ] # this is to make sure we only remove the column once
431
+ spark_cols_after_join = left_container.column_map.get_spark_columns()
432
+ snowpark_cols_after_join = left_container.column_map.get_snowpark_columns()
433
+ snowpark_col_types = [
434
+ f.datatype for f in left_container.dataframe.schema.fields
249
435
  ]
250
436
 
251
- qualifiers = list(left_container.column_map.get_qualifiers()) + [
252
- right_container.column_map.get_qualifier_for_spark_column(spark_col)
253
- for i, spark_col in enumerate(
254
- right_container.column_map.get_spark_columns()
437
+ qualifiers = left_container.column_map.get_qualifiers()
438
+
439
+ right_df_snowpark_columns = right_container.column_map.get_snowpark_columns()
440
+
441
+ for i, spark_col in enumerate(right_container.column_map.get_spark_columns()):
442
+ if (
443
+ spark_col not in case_corrected_right_columns
444
+ or spark_col in right_container.column_map.get_spark_columns()[:i]
445
+ ):
446
+ spark_cols_after_join.append(spark_col)
447
+ snowpark_cols_after_join.append(right_df_snowpark_columns[i])
448
+ snowpark_col_types.append(
449
+ right_container.dataframe.schema.fields[i].datatype
450
+ )
451
+
452
+ qualifiers.append(
453
+ right_container.column_map.get_qualifier_for_spark_column(spark_col)
454
+ )
455
+
456
+ snowpark_cols_after_join_deduplicated = []
457
+ snowpark_cols_after_join_counter = Counter(snowpark_cols_after_join)
458
+ seen_duplicated_columns = set()
459
+
460
+ for col in snowpark_cols_after_join:
461
+ if snowpark_cols_after_join_counter[col] == 2:
462
+ # This means that the same column exists twice in the joined df, likely due to a self-join and
463
+ # we need to lsuffix and rsuffix to the names of both columns, similar to what Snowpark did under the hood.
464
+
465
+ suffix = (
466
+ DUPLICATED_JOIN_COL_RSUFFIX
467
+ if col in seen_duplicated_columns
468
+ else DUPLICATED_JOIN_COL_LSUFFIX
255
469
  )
256
- if spark_col not in case_corrected_right_columns
257
- or spark_col
258
- in right_container.column_map.get_spark_columns()[
259
- :i
260
- ] # this is to make sure we only remove the column once]
261
- ]
470
+ unquoted_col = unquote_if_quoted(col)
471
+ quoted = quote_name_without_upper_casing(unquoted_col + suffix)
472
+ snowpark_cols_after_join_deduplicated.append(quoted)
473
+
474
+ seen_duplicated_columns.add(col)
475
+ else:
476
+ snowpark_cols_after_join_deduplicated.append(col)
262
477
 
263
478
  column_metadata = {}
264
479
  if left_container.column_map.column_metadata:
@@ -287,33 +502,13 @@ def map_join(rel: relation_proto.Relation) -> DataFrameContainer:
287
502
  result_container = DataFrameContainer.create_with_column_mapping(
288
503
  dataframe=result,
289
504
  spark_column_names=spark_cols_after_join,
290
- snowpark_column_names=result.columns,
505
+ snowpark_column_names=snowpark_cols_after_join_deduplicated,
291
506
  column_metadata=column_metadata,
292
507
  column_qualifiers=qualifiers,
508
+ hidden_columns=hidden_columns,
509
+ snowpark_column_types=snowpark_col_types,
293
510
  )
294
511
 
295
- # Fix for USING join column references with different plan IDs
296
- # After a USING join, references to the right dataframe's columns should resolve
297
- # to the result dataframe that contains the merged columns
298
- if (
299
- using_columns
300
- and rel.join.right.HasField("common")
301
- and rel.join.right.common.HasField("plan_id")
302
- ):
303
- right_plan_id = rel.join.right.common.plan_id
304
- set_plan_id_map(right_plan_id, result_container)
305
-
306
- # For FULL OUTER joins, we also need to map the left dataframe's plan_id
307
- # since both columns are replaced with a coalesced column
308
- if (
309
- using_columns
310
- and join_type == "full_outer"
311
- and rel.join.left.HasField("common")
312
- and rel.join.left.common.HasField("plan_id")
313
- ):
314
- left_plan_id = rel.join.left.common.plan_id
315
- set_plan_id_map(left_plan_id, result_container)
316
-
317
512
  if rel.join.using_columns:
318
513
  # When join 'using_columns', the 'join columns' should go first in result DF.
319
514
  idxs_to_shift = [
@@ -345,6 +540,7 @@ def map_join(rel: relation_proto.Relation) -> DataFrameContainer:
345
540
  cached_schema_getter=lambda: snowpark.types.StructType(
346
541
  reorder(original_df.schema.fields)
347
542
  ),
543
+ hidden_columns=hidden_columns,
348
544
  )
349
545
 
350
546
  return result_container