snowpark-connect 0.24.0__py3-none-any.whl → 0.26.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of snowpark-connect might be problematic. Click here for more details.

Files changed (484) hide show
  1. snowflake/snowpark_connect/column_name_handler.py +116 -4
  2. snowflake/snowpark_connect/config.py +23 -0
  3. snowflake/snowpark_connect/constants.py +0 -29
  4. snowflake/snowpark_connect/dataframe_container.py +22 -0
  5. snowflake/snowpark_connect/execute_plan/map_execution_command.py +56 -1
  6. snowflake/snowpark_connect/expression/literal.py +13 -2
  7. snowflake/snowpark_connect/expression/map_cast.py +5 -8
  8. snowflake/snowpark_connect/expression/map_sql_expression.py +23 -1
  9. snowflake/snowpark_connect/expression/map_udf.py +88 -29
  10. snowflake/snowpark_connect/expression/map_unresolved_attribute.py +199 -15
  11. snowflake/snowpark_connect/expression/map_unresolved_extract_value.py +44 -16
  12. snowflake/snowpark_connect/expression/map_unresolved_function.py +840 -367
  13. snowflake/snowpark_connect/expression/map_unresolved_star.py +3 -2
  14. snowflake/snowpark_connect/hidden_column.py +39 -0
  15. snowflake/snowpark_connect/includes/jars/hadoop-client-api-trimmed-3.3.4.jar +0 -0
  16. snowflake/snowpark_connect/includes/jars/json4s-native_2.12-3.7.0-M11.jar +0 -0
  17. snowflake/snowpark_connect/includes/jars/paranamer-2.8.3.jar +0 -0
  18. snowflake/snowpark_connect/includes/jars/sas-scala-udf_2.12-0.1.0.jar +0 -0
  19. snowflake/snowpark_connect/includes/jars/{hadoop-client-api-3.3.4.jar → spark-connect-client-jvm_2.12-3.5.6.jar} +0 -0
  20. snowflake/snowpark_connect/relation/map_column_ops.py +17 -4
  21. snowflake/snowpark_connect/relation/map_extension.py +52 -11
  22. snowflake/snowpark_connect/relation/map_join.py +258 -62
  23. snowflake/snowpark_connect/relation/map_map_partitions.py +9 -4
  24. snowflake/snowpark_connect/relation/map_relation.py +12 -1
  25. snowflake/snowpark_connect/relation/map_row_ops.py +8 -1
  26. snowflake/snowpark_connect/relation/map_sql.py +88 -11
  27. snowflake/snowpark_connect/relation/map_udtf.py +100 -46
  28. snowflake/snowpark_connect/relation/read/map_read.py +3 -3
  29. snowflake/snowpark_connect/relation/read/map_read_jdbc.py +1 -1
  30. snowflake/snowpark_connect/relation/read/map_read_json.py +8 -1
  31. snowflake/snowpark_connect/relation/read/map_read_table.py +1 -9
  32. snowflake/snowpark_connect/relation/read/reader_config.py +3 -1
  33. snowflake/snowpark_connect/relation/utils.py +44 -0
  34. snowflake/snowpark_connect/relation/write/map_write.py +175 -75
  35. snowflake/snowpark_connect/resources_initializer.py +47 -6
  36. snowflake/snowpark_connect/server.py +26 -4
  37. snowflake/snowpark_connect/type_mapping.py +29 -25
  38. snowflake/snowpark_connect/typed_column.py +14 -0
  39. snowflake/snowpark_connect/utils/artifacts.py +23 -0
  40. snowflake/snowpark_connect/utils/concurrent.py +4 -0
  41. snowflake/snowpark_connect/utils/context.py +6 -1
  42. snowflake/snowpark_connect/utils/external_udxf_cache.py +36 -0
  43. snowflake/snowpark_connect/utils/scala_udf_utils.py +596 -0
  44. snowflake/snowpark_connect/utils/session.py +4 -0
  45. snowflake/snowpark_connect/utils/telemetry.py +6 -17
  46. snowflake/snowpark_connect/utils/udf_helper.py +2 -0
  47. snowflake/snowpark_connect/utils/udf_utils.py +22 -1
  48. snowflake/snowpark_connect/utils/udtf_utils.py +1 -0
  49. snowflake/snowpark_connect/version.py +1 -1
  50. {snowpark_connect-0.24.0.dist-info → snowpark_connect-0.26.0.dist-info}/METADATA +1 -1
  51. snowpark_connect-0.26.0.dist-info/RECORD +481 -0
  52. snowflake/snowpark_connect/includes/jars/scala-compiler-2.12.18.jar +0 -0
  53. snowflake/snowpark_connect/includes/jars/spark-kubernetes_2.12-3.5.6.jar +0 -0
  54. snowflake/snowpark_connect/includes/jars/spark-mllib_2.12-3.5.6.jar +0 -0
  55. snowflake/snowpark_connect/includes/jars/spark-streaming_2.12-3.5.6.jar +0 -0
  56. snowflake/snowpark_connect/includes/python/pyspark/errors/tests/__init__.py +0 -16
  57. snowflake/snowpark_connect/includes/python/pyspark/errors/tests/test_errors.py +0 -60
  58. snowflake/snowpark_connect/includes/python/pyspark/ml/deepspeed/tests/test_deepspeed_distributor.py +0 -306
  59. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/__init__.py +0 -16
  60. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_connect_classification.py +0 -53
  61. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_connect_evaluation.py +0 -50
  62. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_connect_feature.py +0 -43
  63. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_connect_function.py +0 -114
  64. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_connect_pipeline.py +0 -47
  65. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_connect_summarizer.py +0 -43
  66. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_connect_tuning.py +0 -46
  67. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_legacy_mode_classification.py +0 -238
  68. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_legacy_mode_evaluation.py +0 -194
  69. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_legacy_mode_feature.py +0 -156
  70. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_legacy_mode_pipeline.py +0 -184
  71. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_legacy_mode_summarizer.py +0 -78
  72. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_legacy_mode_tuning.py +0 -292
  73. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_parity_torch_data_loader.py +0 -50
  74. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_parity_torch_distributor.py +0 -152
  75. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_algorithms.py +0 -456
  76. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_base.py +0 -96
  77. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_dl_util.py +0 -186
  78. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_evaluation.py +0 -77
  79. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_feature.py +0 -401
  80. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_functions.py +0 -528
  81. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_image.py +0 -82
  82. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_linalg.py +0 -409
  83. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_model_cache.py +0 -55
  84. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_param.py +0 -441
  85. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_persistence.py +0 -546
  86. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_pipeline.py +0 -71
  87. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_stat.py +0 -52
  88. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_training_summary.py +0 -494
  89. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_util.py +0 -85
  90. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_wrapper.py +0 -138
  91. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/tuning/__init__.py +0 -16
  92. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/tuning/test_cv_io_basic.py +0 -151
  93. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/tuning/test_cv_io_nested.py +0 -97
  94. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/tuning/test_cv_io_pipeline.py +0 -143
  95. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/tuning/test_tuning.py +0 -551
  96. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/tuning/test_tvs_io_basic.py +0 -137
  97. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/tuning/test_tvs_io_nested.py +0 -96
  98. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/tuning/test_tvs_io_pipeline.py +0 -142
  99. snowflake/snowpark_connect/includes/python/pyspark/ml/torch/tests/__init__.py +0 -16
  100. snowflake/snowpark_connect/includes/python/pyspark/ml/torch/tests/test_data_loader.py +0 -137
  101. snowflake/snowpark_connect/includes/python/pyspark/ml/torch/tests/test_distributor.py +0 -561
  102. snowflake/snowpark_connect/includes/python/pyspark/ml/torch/tests/test_log_communication.py +0 -172
  103. snowflake/snowpark_connect/includes/python/pyspark/mllib/tests/__init__.py +0 -16
  104. snowflake/snowpark_connect/includes/python/pyspark/mllib/tests/test_algorithms.py +0 -353
  105. snowflake/snowpark_connect/includes/python/pyspark/mllib/tests/test_feature.py +0 -192
  106. snowflake/snowpark_connect/includes/python/pyspark/mllib/tests/test_linalg.py +0 -680
  107. snowflake/snowpark_connect/includes/python/pyspark/mllib/tests/test_stat.py +0 -206
  108. snowflake/snowpark_connect/includes/python/pyspark/mllib/tests/test_streaming_algorithms.py +0 -471
  109. snowflake/snowpark_connect/includes/python/pyspark/mllib/tests/test_util.py +0 -108
  110. snowflake/snowpark_connect/includes/python/pyspark/pandas/spark/__init__.py +0 -16
  111. snowflake/snowpark_connect/includes/python/pyspark/pandas/spark/accessors.py +0 -1281
  112. snowflake/snowpark_connect/includes/python/pyspark/pandas/spark/functions.py +0 -203
  113. snowflake/snowpark_connect/includes/python/pyspark/pandas/spark/utils.py +0 -202
  114. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/__init__.py +0 -16
  115. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/__init__.py +0 -16
  116. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_any_all.py +0 -177
  117. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_apply_func.py +0 -575
  118. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_binary_ops.py +0 -235
  119. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_combine.py +0 -653
  120. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_compute.py +0 -463
  121. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_corrwith.py +0 -86
  122. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_cov.py +0 -151
  123. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_cumulative.py +0 -139
  124. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_describe.py +0 -458
  125. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_eval.py +0 -86
  126. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_melt.py +0 -202
  127. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_missing_data.py +0 -520
  128. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_pivot.py +0 -361
  129. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/__init__.py +0 -16
  130. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/__init__.py +0 -16
  131. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_any_all.py +0 -40
  132. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_apply_func.py +0 -42
  133. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_binary_ops.py +0 -40
  134. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_combine.py +0 -37
  135. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_compute.py +0 -60
  136. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_corrwith.py +0 -40
  137. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_cov.py +0 -40
  138. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_cumulative.py +0 -90
  139. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_describe.py +0 -40
  140. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_eval.py +0 -40
  141. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_melt.py +0 -40
  142. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_missing_data.py +0 -42
  143. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_pivot.py +0 -37
  144. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/__init__.py +0 -16
  145. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_base.py +0 -36
  146. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_binary_ops.py +0 -42
  147. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_boolean_ops.py +0 -47
  148. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_categorical_ops.py +0 -55
  149. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_complex_ops.py +0 -40
  150. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_date_ops.py +0 -47
  151. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_datetime_ops.py +0 -47
  152. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_null_ops.py +0 -42
  153. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_num_arithmetic.py +0 -43
  154. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_num_ops.py +0 -47
  155. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_num_reverse.py +0 -43
  156. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_string_ops.py +0 -47
  157. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_timedelta_ops.py +0 -47
  158. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_udt_ops.py +0 -40
  159. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/testing_utils.py +0 -226
  160. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/__init__.py +0 -16
  161. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_align.py +0 -39
  162. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_basic_slow.py +0 -55
  163. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_cov_corrwith.py +0 -39
  164. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_dot_frame.py +0 -39
  165. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_dot_series.py +0 -39
  166. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_index.py +0 -39
  167. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_series.py +0 -39
  168. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_setitem_frame.py +0 -43
  169. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_setitem_series.py +0 -43
  170. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/__init__.py +0 -16
  171. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_attrs.py +0 -40
  172. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_constructor.py +0 -39
  173. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_conversion.py +0 -42
  174. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_reindexing.py +0 -42
  175. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_reshaping.py +0 -37
  176. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_spark.py +0 -40
  177. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_take.py +0 -42
  178. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_time_series.py +0 -48
  179. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_truncate.py +0 -40
  180. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/__init__.py +0 -16
  181. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_aggregate.py +0 -40
  182. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_apply_func.py +0 -41
  183. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_cumulative.py +0 -67
  184. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_describe.py +0 -40
  185. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_groupby.py +0 -55
  186. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_head_tail.py +0 -40
  187. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_index.py +0 -38
  188. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_missing_data.py +0 -55
  189. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_split_apply.py +0 -39
  190. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_stat.py +0 -38
  191. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/__init__.py +0 -16
  192. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_align.py +0 -40
  193. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_base.py +0 -50
  194. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_category.py +0 -73
  195. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_datetime.py +0 -39
  196. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_indexing.py +0 -40
  197. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_reindex.py +0 -40
  198. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_rename.py +0 -40
  199. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_reset_index.py +0 -48
  200. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_timedelta.py +0 -39
  201. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/io/__init__.py +0 -16
  202. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/io/test_parity_io.py +0 -40
  203. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/plot/__init__.py +0 -16
  204. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/plot/test_parity_frame_plot.py +0 -45
  205. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/plot/test_parity_frame_plot_matplotlib.py +0 -45
  206. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/plot/test_parity_frame_plot_plotly.py +0 -49
  207. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/plot/test_parity_series_plot.py +0 -37
  208. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/plot/test_parity_series_plot_matplotlib.py +0 -53
  209. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/plot/test_parity_series_plot_plotly.py +0 -45
  210. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/__init__.py +0 -16
  211. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_all_any.py +0 -38
  212. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_arg_ops.py +0 -37
  213. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_as_of.py +0 -37
  214. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_as_type.py +0 -38
  215. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_compute.py +0 -37
  216. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_conversion.py +0 -40
  217. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_cumulative.py +0 -40
  218. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_index.py +0 -38
  219. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_missing_data.py +0 -40
  220. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_series.py +0 -37
  221. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_sort.py +0 -38
  222. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_stat.py +0 -38
  223. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_categorical.py +0 -66
  224. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_config.py +0 -37
  225. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_csv.py +0 -37
  226. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_dataframe_conversion.py +0 -42
  227. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_dataframe_spark_io.py +0 -39
  228. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_default_index.py +0 -49
  229. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_ewm.py +0 -37
  230. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_expanding.py +0 -39
  231. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_extension.py +0 -49
  232. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_frame_spark.py +0 -53
  233. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_generic_functions.py +0 -43
  234. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_indexing.py +0 -49
  235. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_indexops_spark.py +0 -39
  236. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_internal.py +0 -41
  237. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_namespace.py +0 -39
  238. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_numpy_compat.py +0 -60
  239. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_ops_on_diff_frames.py +0 -48
  240. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_ops_on_diff_frames_groupby.py +0 -39
  241. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_ops_on_diff_frames_groupby_expanding.py +0 -44
  242. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_ops_on_diff_frames_groupby_rolling.py +0 -84
  243. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_repr.py +0 -37
  244. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_resample.py +0 -45
  245. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_reshape.py +0 -39
  246. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_rolling.py +0 -39
  247. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_scalars.py +0 -37
  248. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_series_conversion.py +0 -39
  249. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_series_datetime.py +0 -39
  250. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_series_string.py +0 -39
  251. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_spark_functions.py +0 -39
  252. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_sql.py +0 -43
  253. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_stats.py +0 -37
  254. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_typedef.py +0 -36
  255. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_utils.py +0 -37
  256. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_window.py +0 -39
  257. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/__init__.py +0 -16
  258. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_base.py +0 -107
  259. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_binary_ops.py +0 -224
  260. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_boolean_ops.py +0 -825
  261. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_categorical_ops.py +0 -562
  262. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_complex_ops.py +0 -368
  263. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_date_ops.py +0 -257
  264. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_datetime_ops.py +0 -260
  265. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_null_ops.py +0 -178
  266. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_num_arithmetic.py +0 -184
  267. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_num_ops.py +0 -497
  268. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_num_reverse.py +0 -140
  269. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_string_ops.py +0 -354
  270. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_timedelta_ops.py +0 -219
  271. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_udt_ops.py +0 -192
  272. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/testing_utils.py +0 -228
  273. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/__init__.py +0 -16
  274. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_align.py +0 -118
  275. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_basic_slow.py +0 -198
  276. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_cov_corrwith.py +0 -181
  277. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_dot_frame.py +0 -103
  278. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_dot_series.py +0 -141
  279. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_index.py +0 -109
  280. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_series.py +0 -136
  281. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_setitem_frame.py +0 -125
  282. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_setitem_series.py +0 -217
  283. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/__init__.py +0 -16
  284. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_attrs.py +0 -384
  285. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_constructor.py +0 -598
  286. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_conversion.py +0 -73
  287. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_reindexing.py +0 -869
  288. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_reshaping.py +0 -487
  289. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_spark.py +0 -309
  290. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_take.py +0 -156
  291. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_time_series.py +0 -149
  292. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_truncate.py +0 -163
  293. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/__init__.py +0 -16
  294. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_aggregate.py +0 -311
  295. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_apply_func.py +0 -524
  296. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_cumulative.py +0 -419
  297. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_describe.py +0 -144
  298. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_groupby.py +0 -979
  299. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_head_tail.py +0 -234
  300. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_index.py +0 -206
  301. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_missing_data.py +0 -421
  302. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_split_apply.py +0 -187
  303. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_stat.py +0 -397
  304. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/__init__.py +0 -16
  305. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_align.py +0 -100
  306. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_base.py +0 -2743
  307. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_category.py +0 -484
  308. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_datetime.py +0 -276
  309. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_indexing.py +0 -432
  310. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_reindex.py +0 -310
  311. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_rename.py +0 -257
  312. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_reset_index.py +0 -160
  313. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_timedelta.py +0 -128
  314. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/io/__init__.py +0 -16
  315. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/io/test_io.py +0 -137
  316. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/plot/__init__.py +0 -16
  317. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/plot/test_frame_plot.py +0 -170
  318. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/plot/test_frame_plot_matplotlib.py +0 -547
  319. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/plot/test_frame_plot_plotly.py +0 -285
  320. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/plot/test_series_plot.py +0 -106
  321. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/plot/test_series_plot_matplotlib.py +0 -409
  322. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/plot/test_series_plot_plotly.py +0 -247
  323. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/__init__.py +0 -16
  324. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_all_any.py +0 -105
  325. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_arg_ops.py +0 -197
  326. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_as_of.py +0 -137
  327. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_as_type.py +0 -227
  328. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_compute.py +0 -634
  329. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_conversion.py +0 -88
  330. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_cumulative.py +0 -139
  331. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_index.py +0 -475
  332. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_missing_data.py +0 -265
  333. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_series.py +0 -818
  334. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_sort.py +0 -162
  335. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_stat.py +0 -780
  336. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_categorical.py +0 -741
  337. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_config.py +0 -160
  338. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_csv.py +0 -453
  339. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_dataframe_conversion.py +0 -281
  340. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_dataframe_spark_io.py +0 -487
  341. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_default_index.py +0 -109
  342. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_ewm.py +0 -434
  343. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_expanding.py +0 -253
  344. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_extension.py +0 -152
  345. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_frame_spark.py +0 -162
  346. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_generic_functions.py +0 -234
  347. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_indexing.py +0 -1339
  348. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_indexops_spark.py +0 -82
  349. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_internal.py +0 -124
  350. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_namespace.py +0 -638
  351. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_numpy_compat.py +0 -200
  352. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_ops_on_diff_frames.py +0 -1355
  353. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_ops_on_diff_frames_groupby.py +0 -655
  354. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_ops_on_diff_frames_groupby_expanding.py +0 -113
  355. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_ops_on_diff_frames_groupby_rolling.py +0 -118
  356. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_repr.py +0 -192
  357. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_resample.py +0 -346
  358. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_reshape.py +0 -495
  359. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_rolling.py +0 -263
  360. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_scalars.py +0 -59
  361. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_series_conversion.py +0 -85
  362. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_series_datetime.py +0 -364
  363. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_series_string.py +0 -362
  364. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_spark_functions.py +0 -46
  365. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_sql.py +0 -123
  366. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_stats.py +0 -581
  367. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_typedef.py +0 -447
  368. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_utils.py +0 -301
  369. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_window.py +0 -465
  370. snowflake/snowpark_connect/includes/python/pyspark/resource/tests/__init__.py +0 -16
  371. snowflake/snowpark_connect/includes/python/pyspark/resource/tests/test_resources.py +0 -83
  372. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/__init__.py +0 -16
  373. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/__init__.py +0 -16
  374. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/client/__init__.py +0 -16
  375. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/client/test_artifact.py +0 -420
  376. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/client/test_client.py +0 -358
  377. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/streaming/__init__.py +0 -16
  378. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/streaming/test_parity_foreach.py +0 -36
  379. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/streaming/test_parity_foreach_batch.py +0 -44
  380. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/streaming/test_parity_listener.py +0 -116
  381. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/streaming/test_parity_streaming.py +0 -35
  382. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_connect_basic.py +0 -3612
  383. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_connect_column.py +0 -1042
  384. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_connect_function.py +0 -2381
  385. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_connect_plan.py +0 -1060
  386. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_arrow.py +0 -163
  387. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_arrow_map.py +0 -38
  388. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_arrow_python_udf.py +0 -48
  389. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_catalog.py +0 -36
  390. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_column.py +0 -55
  391. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_conf.py +0 -36
  392. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_dataframe.py +0 -96
  393. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_datasources.py +0 -44
  394. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_errors.py +0 -36
  395. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_functions.py +0 -59
  396. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_group.py +0 -36
  397. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_pandas_cogrouped_map.py +0 -59
  398. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_pandas_grouped_map.py +0 -74
  399. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_pandas_grouped_map_with_state.py +0 -62
  400. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_pandas_map.py +0 -58
  401. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_pandas_udf.py +0 -70
  402. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_pandas_udf_grouped_agg.py +0 -50
  403. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_pandas_udf_scalar.py +0 -68
  404. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_pandas_udf_window.py +0 -40
  405. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_readwriter.py +0 -46
  406. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_serde.py +0 -44
  407. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_types.py +0 -100
  408. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_udf.py +0 -100
  409. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_udtf.py +0 -163
  410. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_session.py +0 -181
  411. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_utils.py +0 -42
  412. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/__init__.py +0 -16
  413. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_cogrouped_map.py +0 -623
  414. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_grouped_map.py +0 -869
  415. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_grouped_map_with_state.py +0 -342
  416. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_map.py +0 -436
  417. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_udf.py +0 -363
  418. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_udf_grouped_agg.py +0 -592
  419. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_udf_scalar.py +0 -1503
  420. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_udf_typehints.py +0 -392
  421. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_udf_typehints_with_future_annotations.py +0 -375
  422. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_udf_window.py +0 -411
  423. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/streaming/__init__.py +0 -16
  424. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/streaming/test_streaming.py +0 -401
  425. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/streaming/test_streaming_foreach.py +0 -295
  426. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/streaming/test_streaming_foreach_batch.py +0 -106
  427. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/streaming/test_streaming_listener.py +0 -558
  428. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_arrow.py +0 -1346
  429. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_arrow_map.py +0 -182
  430. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_arrow_python_udf.py +0 -202
  431. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_catalog.py +0 -503
  432. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_column.py +0 -225
  433. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_conf.py +0 -83
  434. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_context.py +0 -201
  435. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_dataframe.py +0 -1931
  436. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_datasources.py +0 -256
  437. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_errors.py +0 -69
  438. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_functions.py +0 -1349
  439. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_group.py +0 -53
  440. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_pandas_sqlmetrics.py +0 -68
  441. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_readwriter.py +0 -283
  442. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_serde.py +0 -155
  443. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_session.py +0 -412
  444. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_types.py +0 -1581
  445. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_udf.py +0 -961
  446. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_udf_profiler.py +0 -165
  447. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_udtf.py +0 -1456
  448. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_utils.py +0 -1686
  449. snowflake/snowpark_connect/includes/python/pyspark/streaming/tests/__init__.py +0 -16
  450. snowflake/snowpark_connect/includes/python/pyspark/streaming/tests/test_context.py +0 -184
  451. snowflake/snowpark_connect/includes/python/pyspark/streaming/tests/test_dstream.py +0 -706
  452. snowflake/snowpark_connect/includes/python/pyspark/streaming/tests/test_kinesis.py +0 -118
  453. snowflake/snowpark_connect/includes/python/pyspark/streaming/tests/test_listener.py +0 -160
  454. snowflake/snowpark_connect/includes/python/pyspark/tests/__init__.py +0 -16
  455. snowflake/snowpark_connect/includes/python/pyspark/tests/test_appsubmit.py +0 -306
  456. snowflake/snowpark_connect/includes/python/pyspark/tests/test_broadcast.py +0 -196
  457. snowflake/snowpark_connect/includes/python/pyspark/tests/test_conf.py +0 -44
  458. snowflake/snowpark_connect/includes/python/pyspark/tests/test_context.py +0 -346
  459. snowflake/snowpark_connect/includes/python/pyspark/tests/test_daemon.py +0 -89
  460. snowflake/snowpark_connect/includes/python/pyspark/tests/test_install_spark.py +0 -124
  461. snowflake/snowpark_connect/includes/python/pyspark/tests/test_join.py +0 -69
  462. snowflake/snowpark_connect/includes/python/pyspark/tests/test_memory_profiler.py +0 -167
  463. snowflake/snowpark_connect/includes/python/pyspark/tests/test_pin_thread.py +0 -194
  464. snowflake/snowpark_connect/includes/python/pyspark/tests/test_profiler.py +0 -168
  465. snowflake/snowpark_connect/includes/python/pyspark/tests/test_rdd.py +0 -939
  466. snowflake/snowpark_connect/includes/python/pyspark/tests/test_rddbarrier.py +0 -52
  467. snowflake/snowpark_connect/includes/python/pyspark/tests/test_rddsampler.py +0 -66
  468. snowflake/snowpark_connect/includes/python/pyspark/tests/test_readwrite.py +0 -368
  469. snowflake/snowpark_connect/includes/python/pyspark/tests/test_serializers.py +0 -257
  470. snowflake/snowpark_connect/includes/python/pyspark/tests/test_shuffle.py +0 -267
  471. snowflake/snowpark_connect/includes/python/pyspark/tests/test_stage_sched.py +0 -153
  472. snowflake/snowpark_connect/includes/python/pyspark/tests/test_statcounter.py +0 -130
  473. snowflake/snowpark_connect/includes/python/pyspark/tests/test_taskcontext.py +0 -350
  474. snowflake/snowpark_connect/includes/python/pyspark/tests/test_util.py +0 -97
  475. snowflake/snowpark_connect/includes/python/pyspark/tests/test_worker.py +0 -271
  476. snowpark_connect-0.24.0.dist-info/RECORD +0 -898
  477. {snowpark_connect-0.24.0.data → snowpark_connect-0.26.0.data}/scripts/snowpark-connect +0 -0
  478. {snowpark_connect-0.24.0.data → snowpark_connect-0.26.0.data}/scripts/snowpark-session +0 -0
  479. {snowpark_connect-0.24.0.data → snowpark_connect-0.26.0.data}/scripts/snowpark-submit +0 -0
  480. {snowpark_connect-0.24.0.dist-info → snowpark_connect-0.26.0.dist-info}/WHEEL +0 -0
  481. {snowpark_connect-0.24.0.dist-info → snowpark_connect-0.26.0.dist-info}/licenses/LICENSE-binary +0 -0
  482. {snowpark_connect-0.24.0.dist-info → snowpark_connect-0.26.0.dist-info}/licenses/LICENSE.txt +0 -0
  483. {snowpark_connect-0.24.0.dist-info → snowpark_connect-0.26.0.dist-info}/licenses/NOTICE-binary +0 -0
  484. {snowpark_connect-0.24.0.dist-info → snowpark_connect-0.26.0.dist-info}/top_level.txt +0 -0
@@ -34,6 +34,7 @@ def map_unresolved_star(
34
34
  column_mapping: ColumnNameMap,
35
35
  typer: ExpressionTyper,
36
36
  ) -> tuple[list[str], TypedColumn]:
37
+
37
38
  if exp.unresolved_star.HasField("unparsed_target"):
38
39
  unparsed_target = exp.unresolved_star.unparsed_target
39
40
  name_parts = split_fully_qualified_spark_name(unparsed_target)
@@ -102,7 +103,7 @@ def map_unresolved_star(
102
103
  prefix_candidate_str = f"{prefix_candidate_str}.{name_parts[i]}"
103
104
  prefix_candidate = (
104
105
  column_mapping.get_snowpark_column_name_from_spark_column_name(
105
- prefix_candidate_str, allow_non_exists=True
106
+ prefix_candidate_str, allow_non_exists=True, is_qualified=(i > 0)
106
107
  )
107
108
  )
108
109
  if prefix_candidate is None:
@@ -180,7 +181,7 @@ def map_unresolved_star_struct(
180
181
  prefix_candidate_str = f"{prefix_candidate_str}.{name_parts[i]}"
181
182
  prefix_candidate = (
182
183
  column_mapping.get_snowpark_column_name_from_spark_column_name(
183
- prefix_candidate_str, allow_non_exists=True
184
+ prefix_candidate_str, allow_non_exists=True, is_qualified=(i > 0)
184
185
  )
185
186
  )
186
187
  if prefix_candidate is None:
@@ -0,0 +1,39 @@
1
+ #
2
+ # Copyright (c) 2012-2025 Snowflake Computing Inc. All rights reserved.
3
+ #
4
+
5
+
6
+ class HiddenColumn:
7
+ """
8
+ Represents a hidden column in a Snowflake table.
9
+
10
+ Hidden columns are not visible in standard queries but can be accessed
11
+ directly if needed. This class provides a way to reference such columns
12
+ in Snowpark operations
13
+ """
14
+
15
+ def __init__(
16
+ self,
17
+ hidden_snowpark_name: str,
18
+ spark_name: str,
19
+ visible_snowpark_name: str,
20
+ qualifiers: list[str] | None = None,
21
+ original_position: int | None = None,
22
+ ) -> None:
23
+ """
24
+ Initializes a HiddenColumn instance.
25
+
26
+ Args:
27
+ name (str): The name of the hidden column.
28
+ """
29
+
30
+ # The Snowpark internal name for the hidden column
31
+ self.hidden_snowpark_name = hidden_snowpark_name
32
+ # The Spark name for the hidden column
33
+ self.spark_name = spark_name
34
+ # The left side visible Snowpark name for the dropped right side column
35
+ self.visible_snowpark_name = visible_snowpark_name
36
+ # Qualifiers for the hidden column (e.g., table or schema names)
37
+ self.qualifiers = qualifiers if qualifiers is not None else []
38
+ # The position of the hidden column in the original schema
39
+ self.original_position = original_position
@@ -735,10 +735,17 @@ def map_with_columns(
735
735
  """
736
736
  input_container = map_relation(rel.with_columns.input)
737
737
  input_df = input_container.dataframe
738
- with_columns = [
739
- map_alias(alias, input_container.column_map, ExpressionTyper(input_df))
740
- for alias in rel.with_columns.aliases
741
- ]
738
+ with_columns = []
739
+ for alias in rel.with_columns.aliases:
740
+ spark_names, typed_alias = map_alias(
741
+ alias, input_container.column_map, ExpressionTyper(input_df)
742
+ )
743
+ register_lca_alias(spark_names[0], typed_alias)
744
+ with_columns.append((spark_names, typed_alias))
745
+
746
+ # we don't need lateral aliases anymore
747
+ clear_lca_alias_map()
748
+
742
749
  # TODO: This list needs to contain all unique column names, but the code below doesn't
743
750
  # guarantee that.
744
751
  with_columns_names = []
@@ -806,6 +813,12 @@ def map_with_columns(
806
813
  with_columns_names_deduped, with_columns_exprs_deduped
807
814
  ).select(*new_snowpark_columns)
808
815
 
816
+ # SNOW-2306644: the next projection after a withColumn call can completely remove the added column
817
+ # df.withColumn("new").select("foo").filter("new") will fail with a missing column error
818
+ # the column will be preserved if flattening is disabled
819
+ if hasattr(result, "_select_statement"):
820
+ result._select_statement.flatten_disabled = True
821
+
809
822
  snowpark_name_to_type = dict(
810
823
  [(f.name, f.datatype) for f in input_df.schema.fields]
811
824
  + list(zip(with_columns_names, with_columns_types))
@@ -350,15 +350,51 @@ def map_aggregate(
350
350
  if not is_group_by_all:
351
351
  raw_groupings = [_map_column(exp) for exp in aggregate.grouping_expressions]
352
352
 
353
- # Set the current grouping columns in context for grouping_id() function
354
- grouping_spark_columns = [spark_name for spark_name, _ in raw_groupings]
355
- set_current_grouping_columns(grouping_spark_columns)
353
+ # Determine grouping columns for context
354
+ # For GROUPING SETS, we need to extract the columns from the sets
355
+ grouping_columns_for_context = []
356
+ if aggregate.group_type == snowflake_proto.Aggregate.GROUP_TYPE_GROUPING_SETS:
357
+ # Use a list to preserve order, avoiding duplicates
358
+ for grouping_set in aggregate.grouping_sets:
359
+ for exp in grouping_set.grouping_set:
360
+ spark_names, _ = map_expression(exp, input_container.column_map, typer)
361
+ # map_expression always returns a list, get the first element
362
+ col_name = spark_names[0]
363
+ if col_name not in grouping_columns_for_context:
364
+ grouping_columns_for_context.append(col_name)
365
+ else:
366
+ grouping_columns_for_context = [spark_name for spark_name, _ in raw_groupings]
367
+
368
+ # Set grouping columns context for processing aggregate expressions
369
+ # This context is needed for resolving grouping__id references
370
+ # TODO: This should properly handle nested queries with GROUP BY using push/pop
371
+ # Currently, nested queries may interfere with parent queries
372
+ set_current_grouping_columns(grouping_columns_for_context)
373
+
374
+ # LCA Support for aggregate expressions: Use the LCA alias map
375
+ # Note: We don't clear the map here to preserve any parent context aliases
376
+ from snowflake.snowpark_connect.utils.context import register_lca_alias
356
377
 
357
378
  agg_count = get_sql_aggregate_function_count()
358
379
  for exp in aggregate.aggregate_expressions:
359
380
  col = _map_column(exp)
360
381
  raw_aggregations.append(col)
361
382
 
383
+ # If this is an alias, register it in the LCA map for subsequent expressions
384
+ if (
385
+ exp.WhichOneof("expr_type") == "alias"
386
+ and exp.alias.name
387
+ and len(exp.alias.name) > 0
388
+ ):
389
+ alias_name = exp.alias.name[0]
390
+ spark_name, snowpark_column = col
391
+
392
+ # Register the alias pointing to the result of its expression
393
+ # This handles both simple aliases (k as lca) and complex ones (lca + 1 as col)
394
+ # The snowpark_column already contains the computed expression with its alias wrapper,
395
+ # which is fine - when referenced later, the column's value is what gets used
396
+ register_lca_alias(alias_name, snowpark_column)
397
+
362
398
  if is_group_by_all:
363
399
  new_agg_count = get_sql_aggregate_function_count()
364
400
  if new_agg_count == agg_count:
@@ -404,15 +440,20 @@ def map_aggregate(
404
440
  case snowflake_proto.Aggregate.GROUP_TYPE_CUBE:
405
441
  result = input_df.cube(groupings)
406
442
  case snowflake_proto.Aggregate.GROUP_TYPE_GROUPING_SETS:
407
- # TODO: What do we do about groupings?
408
- sets = (
409
- [
410
- map_expression(exp, input_container.column_map, typer)[1].col
411
- for exp in grouping_sets.grouping_set
412
- ]
413
- for grouping_sets in aggregate.grouping_sets
443
+ # Map each grouping set to columns
444
+ sets_mapped = []
445
+ for grouping_set in aggregate.grouping_sets:
446
+ set_cols = []
447
+ for exp in grouping_set.grouping_set:
448
+ _, typed_col = map_expression(
449
+ exp, input_container.column_map, typer
450
+ )
451
+ set_cols.append(typed_col.col)
452
+ sets_mapped.append(set_cols)
453
+
454
+ result = input_df.group_by_grouping_sets(
455
+ snowpark.GroupingSets(*sets_mapped)
414
456
  )
415
- result = input_df.group_by_grouping_sets(snowpark.GroupingSets(*sets))
416
457
  case other:
417
458
  raise SnowparkConnectNotImplementedError(
418
459
  f"Unsupported GROUP BY type: {other}"
@@ -1,13 +1,18 @@
1
1
  #
2
2
  # Copyright (c) 2012-2025 Snowflake Computing Inc. All rights reserved.
3
3
  #
4
-
4
+ from collections import Counter
5
5
  from functools import reduce
6
6
 
7
7
  import pyspark.sql.connect.proto.relations_pb2 as relation_proto
8
+ from pyspark.errors.exceptions.base import AnalysisException
8
9
 
9
10
  import snowflake.snowpark.functions as snowpark_fn
10
11
  from snowflake import snowpark
12
+ from snowflake.snowpark._internal.analyzer.analyzer_utils import (
13
+ quote_name_without_upper_casing,
14
+ unquote_if_quoted,
15
+ )
11
16
  from snowflake.snowpark_connect.column_name_handler import JoinColumnNameMap
12
17
  from snowflake.snowpark_connect.config import global_config
13
18
  from snowflake.snowpark_connect.constants import COLUMN_METADATA_COLLISION_KEY
@@ -17,6 +22,7 @@ from snowflake.snowpark_connect.expression.map_expression import (
17
22
  map_single_column_expression,
18
23
  )
19
24
  from snowflake.snowpark_connect.expression.typer import JoinExpressionTyper
25
+ from snowflake.snowpark_connect.hidden_column import HiddenColumn
20
26
  from snowflake.snowpark_connect.relation.map_relation import (
21
27
  NATURAL_JOIN_TYPE_BASE,
22
28
  map_relation,
@@ -24,7 +30,6 @@ from snowflake.snowpark_connect.relation.map_relation import (
24
30
  from snowflake.snowpark_connect.utils.context import (
25
31
  push_evaluating_join_condition,
26
32
  push_sql_scope,
27
- set_plan_id_map,
28
33
  set_sql_plan_name,
29
34
  )
30
35
  from snowflake.snowpark_connect.utils.telemetry import (
@@ -33,6 +38,9 @@ from snowflake.snowpark_connect.utils.telemetry import (
33
38
 
34
39
  USING_COLUMN_NOT_FOUND_ERROR = "[UNRESOLVED_USING_COLUMN_FOR_JOIN] USING column `{0}` not found on the {1} side of the join. The {1}-side columns: {2}"
35
40
 
41
+ DUPLICATED_JOIN_COL_LSUFFIX = "_left"
42
+ DUPLICATED_JOIN_COL_RSUFFIX = "_right"
43
+
36
44
 
37
45
  def map_join(rel: relation_proto.Relation) -> DataFrameContainer:
38
46
  left_container: DataFrameContainer = map_relation(rel.join.left)
@@ -74,6 +82,13 @@ def map_join(rel: relation_proto.Relation) -> DataFrameContainer:
74
82
 
75
83
  # This handles case sensitivity for using_columns
76
84
  case_corrected_right_columns: list[str] = []
85
+ hidden_columns = set()
86
+ # Propagate the hidden columns from left/right inputs to the result in case of chained joins
87
+ if left_container.column_map.hidden_columns:
88
+ hidden_columns.update(left_container.column_map.hidden_columns)
89
+
90
+ if right_container.column_map.hidden_columns:
91
+ hidden_columns.update(right_container.column_map.hidden_columns)
77
92
 
78
93
  if rel.join.HasField("join_condition"):
79
94
  assert not using_columns
@@ -105,8 +120,8 @@ def map_join(rel: relation_proto.Relation) -> DataFrameContainer:
105
120
  right=right_input,
106
121
  on=join_expression.col,
107
122
  how=join_type,
108
- lsuffix="_left",
109
- rsuffix="_right",
123
+ lsuffix=DUPLICATED_JOIN_COL_LSUFFIX,
124
+ rsuffix=DUPLICATED_JOIN_COL_RSUFFIX,
110
125
  )
111
126
  elif using_columns:
112
127
  if any(
@@ -156,12 +171,24 @@ def map_join(rel: relation_proto.Relation) -> DataFrameContainer:
156
171
  )
157
172
  )
158
173
 
174
+ using_columns_snowpark_names = (
175
+ left_container.column_map.get_snowpark_column_names_from_spark_column_names(
176
+ list(using_columns), return_first=True
177
+ )
178
+ )
179
+
180
+ using_columns_snowpark_types = [
181
+ left_container.dataframe.schema.fields[idx].datatype
182
+ for idx, col in enumerate(left_container.column_map.get_snowpark_columns())
183
+ if col in using_columns_snowpark_names
184
+ ]
185
+
159
186
  # Round trip the using columns through the column map to get the correct names
160
187
  # in order to support case sensitivity.
161
188
  # TODO: case_corrected_left_columns / case_corrected_right_columns may no longer be required as Snowpark dataframe preserves the column casing now.
162
- case_corrected_left_columns = left_container.column_map.get_spark_column_names_from_snowpark_column_names(
163
- left_container.column_map.get_snowpark_column_names_from_spark_column_names(
164
- list(using_columns), return_first=True
189
+ case_corrected_left_columns = (
190
+ left_container.column_map.get_spark_column_names_from_snowpark_column_names(
191
+ using_columns_snowpark_names
165
192
  )
166
193
  )
167
194
  case_corrected_right_columns = right_container.column_map.get_spark_column_names_from_snowpark_column_names(
@@ -195,28 +222,141 @@ def map_join(rel: relation_proto.Relation) -> DataFrameContainer:
195
222
  (left == right for left, right in snowpark_using_columns),
196
223
  ),
197
224
  how=join_type,
225
+ rsuffix=DUPLICATED_JOIN_COL_RSUFFIX,
198
226
  )
227
+ # If we disambiguated the snowpark_using_columns during the join, we need to update 'snowpark_using_columns' to
228
+ # use the disambiguated names.
229
+ disambiguated_snowpark_using_columns = []
230
+
231
+ # Ignore disambiguation for LEFT SEMI JOIN and LEFT ANTI JOIN because they drop the right columns, so it'll never disambiguate.
232
+ if join_type in ["leftsemi", "leftanti"]:
233
+ disambiguated_snowpark_using_columns = snowpark_using_columns
234
+ else:
235
+ normalized_joined_columns = [
236
+ unquote_if_quoted(col) for col in joined_df.columns
237
+ ]
238
+ # snowpark_using_columns is a list of tuples of snowpark columns, joined_df.columns is a list of strings of column names
239
+ for (left, right) in snowpark_using_columns:
240
+ normalized_left_name = unquote_if_quoted(left.getName())
241
+ normalized_right_name = unquote_if_quoted(right.getName())
242
+
243
+ # are both left and right in joined_df? if not, it's been disambiguated
244
+ if (
245
+ normalized_left_name in normalized_joined_columns
246
+ and normalized_right_name in normalized_joined_columns
247
+ ):
248
+ # we want to just add this
249
+ disambiguated_snowpark_using_columns.append((left, right))
250
+ else:
251
+ # we need to figure out the disambiguated names and add those - it only disambiguates if left == right
252
+ disambiguated_left: snowpark.Column | None = None
253
+ disambiguated_right: snowpark.Column | None = None
254
+
255
+ for col in normalized_joined_columns:
256
+ quoted_col = f'"{col}"'
257
+ # get the column name and cross check it to see if it ends with the og name
258
+ if col.endswith(normalized_left_name) and col.startswith("l_"):
259
+ disambiguated_left = joined_df[quoted_col]
260
+ elif col.endswith(normalized_right_name) and col.startswith(
261
+ "r_"
262
+ ):
263
+ disambiguated_right = joined_df[quoted_col]
264
+
265
+ # If we have both disambiguated columns, we can break out of the loop to save processing time
266
+ if (
267
+ disambiguated_left is not None
268
+ and disambiguated_right is not None
269
+ ):
270
+ break
271
+ if disambiguated_left is None or disambiguated_right is None:
272
+ raise AnalysisException(
273
+ f"Disambiguated columns not found for {normalized_left_name} and {normalized_right_name}."
274
+ )
275
+ disambiguated_snowpark_using_columns.append(
276
+ (disambiguated_left, disambiguated_right)
277
+ )
278
+
199
279
  # For outer joins, we need to preserve join keys from both sides using COALESCE
280
+ """
281
+ CHANGES:
282
+ - IF CASE
283
+ - Need to drop the using columns
284
+ - Need to create the hidden_columns DF with the using columns from right and left
285
+ - ELSE CASE
286
+ - Need to drop the right side using columns
287
+ - Need to create the hidden_columns DF with the using columns from right
288
+ """
200
289
  if join_type == "full_outer":
201
290
  coalesced_columns = []
202
- columns_to_drop = []
203
- for i, (left_col, right_col) in enumerate(snowpark_using_columns):
291
+ for i, (left_col, _right_col) in enumerate(snowpark_using_columns):
204
292
  # Use the original user-specified column name to preserve case sensitivity
205
- original_column_name = rel.join.using_columns[i]
206
- coalesced_col = snowpark_fn.coalesce(left_col, right_col).alias(
207
- original_column_name
208
- )
293
+ # Use the disambiguated columns for coalescing
294
+ disambiguated_left_col = disambiguated_snowpark_using_columns[i][0]
295
+ disambiguated_right_col = disambiguated_snowpark_using_columns[i][1]
296
+
297
+ coalesced_col = snowpark_fn.coalesce(
298
+ disambiguated_left_col, disambiguated_right_col
299
+ ).alias(left_col.get_name())
209
300
  coalesced_columns.append(coalesced_col)
210
- columns_to_drop.extend([left_col, right_col])
211
301
 
302
+ # Create HiddenColumn objects for each hidden column
303
+ hidden_left = HiddenColumn(
304
+ hidden_snowpark_name=disambiguated_left_col.getName(),
305
+ spark_name=case_corrected_left_columns[i],
306
+ visible_snowpark_name=left_col.get_name(),
307
+ qualifiers=left_container.column_map.get_qualifier_for_spark_column(
308
+ case_corrected_left_columns[i]
309
+ ),
310
+ original_position=left_container.column_map.get_spark_columns().index(
311
+ case_corrected_left_columns[i]
312
+ ),
313
+ )
314
+
315
+ hidden_right = HiddenColumn(
316
+ hidden_snowpark_name=disambiguated_right_col.getName(),
317
+ spark_name=case_corrected_right_columns[i],
318
+ visible_snowpark_name=left_col.get_name(),
319
+ qualifiers=right_container.column_map.get_qualifier_for_spark_column(
320
+ case_corrected_right_columns[i]
321
+ ),
322
+ original_position=right_container.column_map.get_spark_columns().index(
323
+ case_corrected_right_columns[i]
324
+ ),
325
+ )
326
+ hidden_columns.update(
327
+ [
328
+ hidden_left,
329
+ hidden_right,
330
+ ]
331
+ )
332
+
333
+ # All non-hidden columns (not including the coalesced columns)
212
334
  other_columns = [
213
335
  snowpark_fn.col(col_name)
214
336
  for col_name in joined_df.columns
215
- if col_name not in [col.getName() for col in columns_to_drop]
337
+ if col_name not in [col.hidden_snowpark_name for col in hidden_columns]
216
338
  ]
217
339
  result = joined_df.select(coalesced_columns + other_columns)
340
+
218
341
  else:
219
342
  result = joined_df.drop(*(right for _, right in snowpark_using_columns))
343
+ # We never run into the disambiguation case unless it's a full outer join.
344
+ for i, (left_col, right_col) in enumerate(
345
+ disambiguated_snowpark_using_columns
346
+ ):
347
+ # Only right side columns are hidden
348
+ hidden_col = HiddenColumn(
349
+ hidden_snowpark_name=right_col.getName(),
350
+ spark_name=case_corrected_right_columns[i],
351
+ visible_snowpark_name=left_col.getName(),
352
+ qualifiers=right_container.column_map.get_qualifier_for_spark_column(
353
+ case_corrected_right_columns[i]
354
+ ),
355
+ original_position=right_container.column_map.get_spark_columns().index(
356
+ case_corrected_right_columns[i]
357
+ ),
358
+ )
359
+ hidden_columns.add(hidden_col)
220
360
  else:
221
361
  if join_type != "cross" and not global_config.spark_sql_crossJoin_enabled:
222
362
  raise SparkException.implicit_cartesian_product("inner")
@@ -230,35 +370,110 @@ def map_join(rel: relation_proto.Relation) -> DataFrameContainer:
230
370
  # - LEFT SEMI JOIN: Returns left rows that have matches in right table (no right columns)
231
371
  # - LEFT ANTI JOIN: Returns left rows that have NO matches in right table (no right columns)
232
372
  # Both preserve only the columns from the left DataFrame without adding any columns from the right.
233
- spark_cols_after_join: list[str] = left_container.column_map.get_spark_columns()
373
+ spark_cols_after_join = left_container.column_map.get_spark_columns()
374
+ snowpark_cols_after_join = left_container.column_map.get_snowpark_columns()
375
+ snowpark_col_types = [
376
+ f.datatype for f in left_container.dataframe.schema.fields
377
+ ]
234
378
  qualifiers = left_container.column_map.get_qualifiers()
379
+ elif join_type == "full_outer" and using_columns:
380
+ # We want the coalesced columns to be first, followed by all the left and right columns (excluding using columns)
381
+ spark_cols_after_join: list[str] = []
382
+ snowpark_cols_after_join: list[str] = []
383
+ snowpark_col_types: list[str] = []
384
+
385
+ left_container_snowpark_columns = (
386
+ left_container.column_map.get_snowpark_columns()
387
+ )
388
+ right_container_snowpark_columns = (
389
+ right_container.column_map.get_snowpark_columns()
390
+ )
391
+
392
+ qualifiers = []
393
+ for i in range(len(case_corrected_left_columns)):
394
+ spark_cols_after_join.append(case_corrected_left_columns[i])
395
+ snowpark_cols_after_join.append(using_columns_snowpark_names[i])
396
+ snowpark_col_types.append(using_columns_snowpark_types[i])
397
+ qualifiers.append([])
398
+
399
+ # Handle adding left and right columns, excluding the using columns
400
+ for i, spark_col in enumerate(left_container.column_map.get_spark_columns()):
401
+ if (
402
+ spark_col not in case_corrected_left_columns
403
+ or spark_col in left_container.column_map.get_spark_columns()[:i]
404
+ ):
405
+ spark_cols_after_join.append(spark_col)
406
+ snowpark_cols_after_join.append(left_container_snowpark_columns[i])
407
+ qualifiers.append(
408
+ left_container.column_map.get_qualifier_for_spark_column(spark_col)
409
+ )
410
+
411
+ snowpark_col_types.append(
412
+ left_container.dataframe.schema.fields[i].datatype
413
+ )
414
+
415
+ for i, spark_col in enumerate(right_container.column_map.get_spark_columns()):
416
+ if (
417
+ spark_col not in case_corrected_right_columns
418
+ or spark_col in right_container.column_map.get_spark_columns()[:i]
419
+ ):
420
+ spark_cols_after_join.append(spark_col)
421
+ snowpark_cols_after_join.append(right_container_snowpark_columns[i])
422
+ qualifiers.append(
423
+ right_container.column_map.get_qualifier_for_spark_column(spark_col)
424
+ )
425
+
426
+ snowpark_col_types.append(
427
+ right_container.dataframe.schema.fields[i].datatype
428
+ )
429
+
235
430
  else:
236
- # Add Spark columns and plan_ids from left DF
237
- spark_cols_after_join: list[str] = list(
238
- left_container.column_map.get_spark_columns()
239
- ) + [
240
- spark_col
241
- for i, spark_col in enumerate(
242
- right_container.column_map.get_spark_columns()
243
- )
244
- if spark_col not in case_corrected_right_columns
245
- or spark_col
246
- in right_container.column_map.get_spark_columns()[
247
- :i
248
- ] # this is to make sure we only remove the column once
431
+ spark_cols_after_join = left_container.column_map.get_spark_columns()
432
+ snowpark_cols_after_join = left_container.column_map.get_snowpark_columns()
433
+ snowpark_col_types = [
434
+ f.datatype for f in left_container.dataframe.schema.fields
249
435
  ]
250
436
 
251
- qualifiers = list(left_container.column_map.get_qualifiers()) + [
252
- right_container.column_map.get_qualifier_for_spark_column(spark_col)
253
- for i, spark_col in enumerate(
254
- right_container.column_map.get_spark_columns()
437
+ qualifiers = left_container.column_map.get_qualifiers()
438
+
439
+ right_df_snowpark_columns = right_container.column_map.get_snowpark_columns()
440
+
441
+ for i, spark_col in enumerate(right_container.column_map.get_spark_columns()):
442
+ if (
443
+ spark_col not in case_corrected_right_columns
444
+ or spark_col in right_container.column_map.get_spark_columns()[:i]
445
+ ):
446
+ spark_cols_after_join.append(spark_col)
447
+ snowpark_cols_after_join.append(right_df_snowpark_columns[i])
448
+ snowpark_col_types.append(
449
+ right_container.dataframe.schema.fields[i].datatype
450
+ )
451
+
452
+ qualifiers.append(
453
+ right_container.column_map.get_qualifier_for_spark_column(spark_col)
454
+ )
455
+
456
+ snowpark_cols_after_join_deduplicated = []
457
+ snowpark_cols_after_join_counter = Counter(snowpark_cols_after_join)
458
+ seen_duplicated_columns = set()
459
+
460
+ for col in snowpark_cols_after_join:
461
+ if snowpark_cols_after_join_counter[col] == 2:
462
+ # This means that the same column exists twice in the joined df, likely due to a self-join and
463
+ # we need to lsuffix and rsuffix to the names of both columns, similar to what Snowpark did under the hood.
464
+
465
+ suffix = (
466
+ DUPLICATED_JOIN_COL_RSUFFIX
467
+ if col in seen_duplicated_columns
468
+ else DUPLICATED_JOIN_COL_LSUFFIX
255
469
  )
256
- if spark_col not in case_corrected_right_columns
257
- or spark_col
258
- in right_container.column_map.get_spark_columns()[
259
- :i
260
- ] # this is to make sure we only remove the column once]
261
- ]
470
+ unquoted_col = unquote_if_quoted(col)
471
+ quoted = quote_name_without_upper_casing(unquoted_col + suffix)
472
+ snowpark_cols_after_join_deduplicated.append(quoted)
473
+
474
+ seen_duplicated_columns.add(col)
475
+ else:
476
+ snowpark_cols_after_join_deduplicated.append(col)
262
477
 
263
478
  column_metadata = {}
264
479
  if left_container.column_map.column_metadata:
@@ -287,33 +502,13 @@ def map_join(rel: relation_proto.Relation) -> DataFrameContainer:
287
502
  result_container = DataFrameContainer.create_with_column_mapping(
288
503
  dataframe=result,
289
504
  spark_column_names=spark_cols_after_join,
290
- snowpark_column_names=result.columns,
505
+ snowpark_column_names=snowpark_cols_after_join_deduplicated,
291
506
  column_metadata=column_metadata,
292
507
  column_qualifiers=qualifiers,
508
+ hidden_columns=hidden_columns,
509
+ snowpark_column_types=snowpark_col_types,
293
510
  )
294
511
 
295
- # Fix for USING join column references with different plan IDs
296
- # After a USING join, references to the right dataframe's columns should resolve
297
- # to the result dataframe that contains the merged columns
298
- if (
299
- using_columns
300
- and rel.join.right.HasField("common")
301
- and rel.join.right.common.HasField("plan_id")
302
- ):
303
- right_plan_id = rel.join.right.common.plan_id
304
- set_plan_id_map(right_plan_id, result_container)
305
-
306
- # For FULL OUTER joins, we also need to map the left dataframe's plan_id
307
- # since both columns are replaced with a coalesced column
308
- if (
309
- using_columns
310
- and join_type == "full_outer"
311
- and rel.join.left.HasField("common")
312
- and rel.join.left.common.HasField("plan_id")
313
- ):
314
- left_plan_id = rel.join.left.common.plan_id
315
- set_plan_id_map(left_plan_id, result_container)
316
-
317
512
  if rel.join.using_columns:
318
513
  # When join 'using_columns', the 'join columns' should go first in result DF.
319
514
  idxs_to_shift = [
@@ -345,6 +540,7 @@ def map_join(rel: relation_proto.Relation) -> DataFrameContainer:
345
540
  cached_schema_getter=lambda: snowpark.types.StructType(
346
541
  reorder(original_df.schema.fields)
347
542
  ),
543
+ hidden_columns=hidden_columns,
348
544
  )
349
545
 
350
546
  return result_container
@@ -46,9 +46,10 @@ def map_map_partitions(
46
46
  udf_check(udf_proto)
47
47
 
48
48
  # Check if this is mapInArrow (eval_type == 207)
49
- eval_type = udf_proto.python_udf.eval_type
50
-
51
- if eval_type == MAP_IN_ARROW_EVAL_TYPE:
49
+ if (
50
+ udf_proto.WhichOneof("function") == "python_udf"
51
+ and udf_proto.python_udf.eval_type == MAP_IN_ARROW_EVAL_TYPE
52
+ ):
52
53
  return _map_in_arrow_with_pandas_udtf(input_container, udf_proto)
53
54
  else:
54
55
  return _map_partitions_with_udf(input_df, udf_proto)
@@ -126,7 +127,11 @@ def _map_partitions_with_udf(
126
127
  "udf_name": "spark_map_partitions_udf",
127
128
  "input_column_names": input_column_names,
128
129
  "replace": True,
129
- "return_type": proto_to_snowpark_type(udf_proto.python_udf.output_type),
130
+ "return_type": proto_to_snowpark_type(
131
+ udf_proto.python_udf.output_type
132
+ if udf_proto.WhichOneof("function") == "python_udf"
133
+ else udf_proto.scalar_scala_udf.outputType
134
+ ),
130
135
  "udf_packages": global_config.get("snowpark.connect.udf.packages", ""),
131
136
  "udf_imports": get_python_udxf_import_files(input_df.session),
132
137
  }