snowpark-connect 0.20.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of snowpark-connect might be problematic. Click here for more details.

Files changed (879) hide show
  1. snowflake/snowpark_connect/__init__.py +23 -0
  2. snowflake/snowpark_connect/analyze_plan/__init__.py +3 -0
  3. snowflake/snowpark_connect/analyze_plan/map_tree_string.py +38 -0
  4. snowflake/snowpark_connect/column_name_handler.py +735 -0
  5. snowflake/snowpark_connect/config.py +576 -0
  6. snowflake/snowpark_connect/constants.py +47 -0
  7. snowflake/snowpark_connect/control_server.py +52 -0
  8. snowflake/snowpark_connect/dataframe_name_handler.py +54 -0
  9. snowflake/snowpark_connect/date_time_format_mapping.py +399 -0
  10. snowflake/snowpark_connect/empty_dataframe.py +18 -0
  11. snowflake/snowpark_connect/error/__init__.py +11 -0
  12. snowflake/snowpark_connect/error/error_mapping.py +6174 -0
  13. snowflake/snowpark_connect/error/error_utils.py +321 -0
  14. snowflake/snowpark_connect/error/exceptions.py +24 -0
  15. snowflake/snowpark_connect/execute_plan/__init__.py +3 -0
  16. snowflake/snowpark_connect/execute_plan/map_execution_command.py +204 -0
  17. snowflake/snowpark_connect/execute_plan/map_execution_root.py +173 -0
  18. snowflake/snowpark_connect/execute_plan/utils.py +183 -0
  19. snowflake/snowpark_connect/expression/__init__.py +3 -0
  20. snowflake/snowpark_connect/expression/literal.py +90 -0
  21. snowflake/snowpark_connect/expression/map_cast.py +343 -0
  22. snowflake/snowpark_connect/expression/map_expression.py +293 -0
  23. snowflake/snowpark_connect/expression/map_extension.py +104 -0
  24. snowflake/snowpark_connect/expression/map_sql_expression.py +633 -0
  25. snowflake/snowpark_connect/expression/map_udf.py +142 -0
  26. snowflake/snowpark_connect/expression/map_unresolved_attribute.py +241 -0
  27. snowflake/snowpark_connect/expression/map_unresolved_extract_value.py +85 -0
  28. snowflake/snowpark_connect/expression/map_unresolved_function.py +9450 -0
  29. snowflake/snowpark_connect/expression/map_unresolved_star.py +218 -0
  30. snowflake/snowpark_connect/expression/map_update_fields.py +164 -0
  31. snowflake/snowpark_connect/expression/map_window_function.py +258 -0
  32. snowflake/snowpark_connect/expression/typer.py +125 -0
  33. snowflake/snowpark_connect/includes/__init__.py +0 -0
  34. snowflake/snowpark_connect/includes/jars/antlr4-runtime-4.9.3.jar +0 -0
  35. snowflake/snowpark_connect/includes/jars/commons-cli-1.5.0.jar +0 -0
  36. snowflake/snowpark_connect/includes/jars/commons-codec-1.16.1.jar +0 -0
  37. snowflake/snowpark_connect/includes/jars/commons-collections-3.2.2.jar +0 -0
  38. snowflake/snowpark_connect/includes/jars/commons-collections4-4.4.jar +0 -0
  39. snowflake/snowpark_connect/includes/jars/commons-compiler-3.1.9.jar +0 -0
  40. snowflake/snowpark_connect/includes/jars/commons-compress-1.26.0.jar +0 -0
  41. snowflake/snowpark_connect/includes/jars/commons-crypto-1.1.0.jar +0 -0
  42. snowflake/snowpark_connect/includes/jars/commons-dbcp-1.4.jar +0 -0
  43. snowflake/snowpark_connect/includes/jars/commons-io-2.16.1.jar +0 -0
  44. snowflake/snowpark_connect/includes/jars/commons-lang-2.6.jar +0 -0
  45. snowflake/snowpark_connect/includes/jars/commons-lang3-3.12.0.jar +0 -0
  46. snowflake/snowpark_connect/includes/jars/commons-logging-1.1.3.jar +0 -0
  47. snowflake/snowpark_connect/includes/jars/commons-math3-3.6.1.jar +0 -0
  48. snowflake/snowpark_connect/includes/jars/commons-pool-1.5.4.jar +0 -0
  49. snowflake/snowpark_connect/includes/jars/commons-text-1.10.0.jar +0 -0
  50. snowflake/snowpark_connect/includes/jars/hadoop-client-api-3.3.4.jar +0 -0
  51. snowflake/snowpark_connect/includes/jars/jackson-annotations-2.15.2.jar +0 -0
  52. snowflake/snowpark_connect/includes/jars/jackson-core-2.15.2.jar +0 -0
  53. snowflake/snowpark_connect/includes/jars/jackson-core-asl-1.9.13.jar +0 -0
  54. snowflake/snowpark_connect/includes/jars/jackson-databind-2.15.2.jar +0 -0
  55. snowflake/snowpark_connect/includes/jars/jackson-dataformat-yaml-2.15.2.jar +0 -0
  56. snowflake/snowpark_connect/includes/jars/jackson-datatype-jsr310-2.15.2.jar +0 -0
  57. snowflake/snowpark_connect/includes/jars/jackson-mapper-asl-1.9.13.jar +0 -0
  58. snowflake/snowpark_connect/includes/jars/jackson-module-scala_2.12-2.15.2.jar +0 -0
  59. snowflake/snowpark_connect/includes/jars/json4s-ast_2.12-3.7.0-M11.jar +0 -0
  60. snowflake/snowpark_connect/includes/jars/json4s-core_2.12-3.7.0-M11.jar +0 -0
  61. snowflake/snowpark_connect/includes/jars/json4s-jackson_2.12-3.7.0-M11.jar +0 -0
  62. snowflake/snowpark_connect/includes/jars/json4s-scalap_2.12-3.7.0-M11.jar +0 -0
  63. snowflake/snowpark_connect/includes/jars/kryo-shaded-4.0.2.jar +0 -0
  64. snowflake/snowpark_connect/includes/jars/log4j-1.2-api-2.20.0.jar +0 -0
  65. snowflake/snowpark_connect/includes/jars/log4j-api-2.20.0.jar +0 -0
  66. snowflake/snowpark_connect/includes/jars/log4j-core-2.20.0.jar +0 -0
  67. snowflake/snowpark_connect/includes/jars/log4j-slf4j2-impl-2.20.0.jar +0 -0
  68. snowflake/snowpark_connect/includes/jars/paranamer-2.8.jar +0 -0
  69. snowflake/snowpark_connect/includes/jars/scala-collection-compat_2.12-2.7.0.jar +0 -0
  70. snowflake/snowpark_connect/includes/jars/scala-compiler-2.12.18.jar +0 -0
  71. snowflake/snowpark_connect/includes/jars/scala-library-2.12.18.jar +0 -0
  72. snowflake/snowpark_connect/includes/jars/scala-parser-combinators_2.12-2.3.0.jar +0 -0
  73. snowflake/snowpark_connect/includes/jars/scala-reflect-2.12.18.jar +0 -0
  74. snowflake/snowpark_connect/includes/jars/scala-xml_2.12-2.1.0.jar +0 -0
  75. snowflake/snowpark_connect/includes/jars/slf4j-api-2.0.7.jar +0 -0
  76. snowflake/snowpark_connect/includes/jars/spark-catalyst_2.12-3.5.6.jar +0 -0
  77. snowflake/snowpark_connect/includes/jars/spark-common-utils_2.12-3.5.6.jar +0 -0
  78. snowflake/snowpark_connect/includes/jars/spark-core_2.12-3.5.6.jar +0 -0
  79. snowflake/snowpark_connect/includes/jars/spark-graphx_2.12-3.5.6.jar +0 -0
  80. snowflake/snowpark_connect/includes/jars/spark-hive-thriftserver_2.12-3.5.6.jar +0 -0
  81. snowflake/snowpark_connect/includes/jars/spark-hive_2.12-3.5.6.jar +0 -0
  82. snowflake/snowpark_connect/includes/jars/spark-kubernetes_2.12-3.5.6.jar +0 -0
  83. snowflake/snowpark_connect/includes/jars/spark-kvstore_2.12-3.5.6.jar +0 -0
  84. snowflake/snowpark_connect/includes/jars/spark-launcher_2.12-3.5.6.jar +0 -0
  85. snowflake/snowpark_connect/includes/jars/spark-mesos_2.12-3.5.6.jar +0 -0
  86. snowflake/snowpark_connect/includes/jars/spark-mllib-local_2.12-3.5.6.jar +0 -0
  87. snowflake/snowpark_connect/includes/jars/spark-mllib_2.12-3.5.6.jar +0 -0
  88. snowflake/snowpark_connect/includes/jars/spark-network-common_2.12-3.5.6.jar +0 -0
  89. snowflake/snowpark_connect/includes/jars/spark-network-shuffle_2.12-3.5.6.jar +0 -0
  90. snowflake/snowpark_connect/includes/jars/spark-repl_2.12-3.5.6.jar +0 -0
  91. snowflake/snowpark_connect/includes/jars/spark-sketch_2.12-3.5.6.jar +0 -0
  92. snowflake/snowpark_connect/includes/jars/spark-sql-api_2.12-3.5.6.jar +0 -0
  93. snowflake/snowpark_connect/includes/jars/spark-sql_2.12-3.5.6.jar +0 -0
  94. snowflake/snowpark_connect/includes/jars/spark-streaming_2.12-3.5.6.jar +0 -0
  95. snowflake/snowpark_connect/includes/jars/spark-tags_2.12-3.5.6.jar +0 -0
  96. snowflake/snowpark_connect/includes/jars/spark-unsafe_2.12-3.5.6.jar +0 -0
  97. snowflake/snowpark_connect/includes/jars/spark-yarn_2.12-3.5.6.jar +0 -0
  98. snowflake/snowpark_connect/includes/python/__init__.py +21 -0
  99. snowflake/snowpark_connect/includes/python/pyspark/__init__.py +173 -0
  100. snowflake/snowpark_connect/includes/python/pyspark/_globals.py +71 -0
  101. snowflake/snowpark_connect/includes/python/pyspark/_typing.pyi +43 -0
  102. snowflake/snowpark_connect/includes/python/pyspark/accumulators.py +341 -0
  103. snowflake/snowpark_connect/includes/python/pyspark/broadcast.py +383 -0
  104. snowflake/snowpark_connect/includes/python/pyspark/cloudpickle/__init__.py +8 -0
  105. snowflake/snowpark_connect/includes/python/pyspark/cloudpickle/cloudpickle.py +948 -0
  106. snowflake/snowpark_connect/includes/python/pyspark/cloudpickle/cloudpickle_fast.py +844 -0
  107. snowflake/snowpark_connect/includes/python/pyspark/cloudpickle/compat.py +18 -0
  108. snowflake/snowpark_connect/includes/python/pyspark/conf.py +276 -0
  109. snowflake/snowpark_connect/includes/python/pyspark/context.py +2601 -0
  110. snowflake/snowpark_connect/includes/python/pyspark/daemon.py +218 -0
  111. snowflake/snowpark_connect/includes/python/pyspark/errors/__init__.py +70 -0
  112. snowflake/snowpark_connect/includes/python/pyspark/errors/error_classes.py +889 -0
  113. snowflake/snowpark_connect/includes/python/pyspark/errors/exceptions/__init__.py +16 -0
  114. snowflake/snowpark_connect/includes/python/pyspark/errors/exceptions/base.py +228 -0
  115. snowflake/snowpark_connect/includes/python/pyspark/errors/exceptions/captured.py +307 -0
  116. snowflake/snowpark_connect/includes/python/pyspark/errors/exceptions/connect.py +190 -0
  117. snowflake/snowpark_connect/includes/python/pyspark/errors/tests/__init__.py +16 -0
  118. snowflake/snowpark_connect/includes/python/pyspark/errors/tests/test_errors.py +60 -0
  119. snowflake/snowpark_connect/includes/python/pyspark/errors/utils.py +116 -0
  120. snowflake/snowpark_connect/includes/python/pyspark/files.py +165 -0
  121. snowflake/snowpark_connect/includes/python/pyspark/find_spark_home.py +95 -0
  122. snowflake/snowpark_connect/includes/python/pyspark/install.py +203 -0
  123. snowflake/snowpark_connect/includes/python/pyspark/instrumentation_utils.py +190 -0
  124. snowflake/snowpark_connect/includes/python/pyspark/java_gateway.py +248 -0
  125. snowflake/snowpark_connect/includes/python/pyspark/join.py +118 -0
  126. snowflake/snowpark_connect/includes/python/pyspark/ml/__init__.py +71 -0
  127. snowflake/snowpark_connect/includes/python/pyspark/ml/_typing.pyi +84 -0
  128. snowflake/snowpark_connect/includes/python/pyspark/ml/base.py +414 -0
  129. snowflake/snowpark_connect/includes/python/pyspark/ml/classification.py +4332 -0
  130. snowflake/snowpark_connect/includes/python/pyspark/ml/clustering.py +2188 -0
  131. snowflake/snowpark_connect/includes/python/pyspark/ml/common.py +146 -0
  132. snowflake/snowpark_connect/includes/python/pyspark/ml/connect/__init__.py +44 -0
  133. snowflake/snowpark_connect/includes/python/pyspark/ml/connect/base.py +346 -0
  134. snowflake/snowpark_connect/includes/python/pyspark/ml/connect/classification.py +382 -0
  135. snowflake/snowpark_connect/includes/python/pyspark/ml/connect/evaluation.py +291 -0
  136. snowflake/snowpark_connect/includes/python/pyspark/ml/connect/feature.py +258 -0
  137. snowflake/snowpark_connect/includes/python/pyspark/ml/connect/functions.py +77 -0
  138. snowflake/snowpark_connect/includes/python/pyspark/ml/connect/io_utils.py +335 -0
  139. snowflake/snowpark_connect/includes/python/pyspark/ml/connect/pipeline.py +262 -0
  140. snowflake/snowpark_connect/includes/python/pyspark/ml/connect/summarizer.py +120 -0
  141. snowflake/snowpark_connect/includes/python/pyspark/ml/connect/tuning.py +579 -0
  142. snowflake/snowpark_connect/includes/python/pyspark/ml/connect/util.py +173 -0
  143. snowflake/snowpark_connect/includes/python/pyspark/ml/deepspeed/__init__.py +16 -0
  144. snowflake/snowpark_connect/includes/python/pyspark/ml/deepspeed/deepspeed_distributor.py +165 -0
  145. snowflake/snowpark_connect/includes/python/pyspark/ml/deepspeed/tests/test_deepspeed_distributor.py +306 -0
  146. snowflake/snowpark_connect/includes/python/pyspark/ml/dl_util.py +150 -0
  147. snowflake/snowpark_connect/includes/python/pyspark/ml/evaluation.py +1166 -0
  148. snowflake/snowpark_connect/includes/python/pyspark/ml/feature.py +7474 -0
  149. snowflake/snowpark_connect/includes/python/pyspark/ml/fpm.py +543 -0
  150. snowflake/snowpark_connect/includes/python/pyspark/ml/functions.py +842 -0
  151. snowflake/snowpark_connect/includes/python/pyspark/ml/image.py +271 -0
  152. snowflake/snowpark_connect/includes/python/pyspark/ml/linalg/__init__.py +1382 -0
  153. snowflake/snowpark_connect/includes/python/pyspark/ml/model_cache.py +55 -0
  154. snowflake/snowpark_connect/includes/python/pyspark/ml/param/__init__.py +602 -0
  155. snowflake/snowpark_connect/includes/python/pyspark/ml/param/_shared_params_code_gen.py +368 -0
  156. snowflake/snowpark_connect/includes/python/pyspark/ml/param/shared.py +878 -0
  157. snowflake/snowpark_connect/includes/python/pyspark/ml/pipeline.py +451 -0
  158. snowflake/snowpark_connect/includes/python/pyspark/ml/recommendation.py +748 -0
  159. snowflake/snowpark_connect/includes/python/pyspark/ml/regression.py +3335 -0
  160. snowflake/snowpark_connect/includes/python/pyspark/ml/stat.py +523 -0
  161. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/__init__.py +16 -0
  162. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_connect_classification.py +53 -0
  163. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_connect_evaluation.py +50 -0
  164. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_connect_feature.py +43 -0
  165. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_connect_function.py +114 -0
  166. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_connect_pipeline.py +47 -0
  167. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_connect_summarizer.py +43 -0
  168. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_connect_tuning.py +46 -0
  169. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_legacy_mode_classification.py +238 -0
  170. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_legacy_mode_evaluation.py +194 -0
  171. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_legacy_mode_feature.py +156 -0
  172. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_legacy_mode_pipeline.py +184 -0
  173. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_legacy_mode_summarizer.py +78 -0
  174. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_legacy_mode_tuning.py +292 -0
  175. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_parity_torch_data_loader.py +50 -0
  176. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_parity_torch_distributor.py +152 -0
  177. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_algorithms.py +456 -0
  178. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_base.py +96 -0
  179. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_dl_util.py +186 -0
  180. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_evaluation.py +77 -0
  181. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_feature.py +401 -0
  182. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_functions.py +528 -0
  183. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_image.py +82 -0
  184. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_linalg.py +409 -0
  185. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_model_cache.py +55 -0
  186. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_param.py +441 -0
  187. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_persistence.py +546 -0
  188. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_pipeline.py +71 -0
  189. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_stat.py +52 -0
  190. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_training_summary.py +494 -0
  191. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_util.py +85 -0
  192. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_wrapper.py +138 -0
  193. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/tuning/__init__.py +16 -0
  194. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/tuning/test_cv_io_basic.py +151 -0
  195. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/tuning/test_cv_io_nested.py +97 -0
  196. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/tuning/test_cv_io_pipeline.py +143 -0
  197. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/tuning/test_tuning.py +551 -0
  198. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/tuning/test_tvs_io_basic.py +137 -0
  199. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/tuning/test_tvs_io_nested.py +96 -0
  200. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/tuning/test_tvs_io_pipeline.py +142 -0
  201. snowflake/snowpark_connect/includes/python/pyspark/ml/torch/__init__.py +16 -0
  202. snowflake/snowpark_connect/includes/python/pyspark/ml/torch/data.py +100 -0
  203. snowflake/snowpark_connect/includes/python/pyspark/ml/torch/distributor.py +1133 -0
  204. snowflake/snowpark_connect/includes/python/pyspark/ml/torch/log_communication.py +198 -0
  205. snowflake/snowpark_connect/includes/python/pyspark/ml/torch/tests/__init__.py +16 -0
  206. snowflake/snowpark_connect/includes/python/pyspark/ml/torch/tests/test_data_loader.py +137 -0
  207. snowflake/snowpark_connect/includes/python/pyspark/ml/torch/tests/test_distributor.py +561 -0
  208. snowflake/snowpark_connect/includes/python/pyspark/ml/torch/tests/test_log_communication.py +172 -0
  209. snowflake/snowpark_connect/includes/python/pyspark/ml/torch/torch_run_process_wrapper.py +83 -0
  210. snowflake/snowpark_connect/includes/python/pyspark/ml/tree.py +434 -0
  211. snowflake/snowpark_connect/includes/python/pyspark/ml/tuning.py +1741 -0
  212. snowflake/snowpark_connect/includes/python/pyspark/ml/util.py +749 -0
  213. snowflake/snowpark_connect/includes/python/pyspark/ml/wrapper.py +465 -0
  214. snowflake/snowpark_connect/includes/python/pyspark/mllib/__init__.py +44 -0
  215. snowflake/snowpark_connect/includes/python/pyspark/mllib/_typing.pyi +33 -0
  216. snowflake/snowpark_connect/includes/python/pyspark/mllib/classification.py +989 -0
  217. snowflake/snowpark_connect/includes/python/pyspark/mllib/clustering.py +1318 -0
  218. snowflake/snowpark_connect/includes/python/pyspark/mllib/common.py +174 -0
  219. snowflake/snowpark_connect/includes/python/pyspark/mllib/evaluation.py +691 -0
  220. snowflake/snowpark_connect/includes/python/pyspark/mllib/feature.py +1085 -0
  221. snowflake/snowpark_connect/includes/python/pyspark/mllib/fpm.py +233 -0
  222. snowflake/snowpark_connect/includes/python/pyspark/mllib/linalg/__init__.py +1653 -0
  223. snowflake/snowpark_connect/includes/python/pyspark/mllib/linalg/distributed.py +1662 -0
  224. snowflake/snowpark_connect/includes/python/pyspark/mllib/random.py +698 -0
  225. snowflake/snowpark_connect/includes/python/pyspark/mllib/recommendation.py +389 -0
  226. snowflake/snowpark_connect/includes/python/pyspark/mllib/regression.py +1067 -0
  227. snowflake/snowpark_connect/includes/python/pyspark/mllib/stat/KernelDensity.py +59 -0
  228. snowflake/snowpark_connect/includes/python/pyspark/mllib/stat/__init__.py +34 -0
  229. snowflake/snowpark_connect/includes/python/pyspark/mllib/stat/_statistics.py +409 -0
  230. snowflake/snowpark_connect/includes/python/pyspark/mllib/stat/distribution.py +39 -0
  231. snowflake/snowpark_connect/includes/python/pyspark/mllib/stat/test.py +86 -0
  232. snowflake/snowpark_connect/includes/python/pyspark/mllib/tests/__init__.py +16 -0
  233. snowflake/snowpark_connect/includes/python/pyspark/mllib/tests/test_algorithms.py +353 -0
  234. snowflake/snowpark_connect/includes/python/pyspark/mllib/tests/test_feature.py +192 -0
  235. snowflake/snowpark_connect/includes/python/pyspark/mllib/tests/test_linalg.py +680 -0
  236. snowflake/snowpark_connect/includes/python/pyspark/mllib/tests/test_stat.py +206 -0
  237. snowflake/snowpark_connect/includes/python/pyspark/mllib/tests/test_streaming_algorithms.py +471 -0
  238. snowflake/snowpark_connect/includes/python/pyspark/mllib/tests/test_util.py +108 -0
  239. snowflake/snowpark_connect/includes/python/pyspark/mllib/tree.py +888 -0
  240. snowflake/snowpark_connect/includes/python/pyspark/mllib/util.py +659 -0
  241. snowflake/snowpark_connect/includes/python/pyspark/pandas/__init__.py +165 -0
  242. snowflake/snowpark_connect/includes/python/pyspark/pandas/_typing.py +52 -0
  243. snowflake/snowpark_connect/includes/python/pyspark/pandas/accessors.py +989 -0
  244. snowflake/snowpark_connect/includes/python/pyspark/pandas/base.py +1804 -0
  245. snowflake/snowpark_connect/includes/python/pyspark/pandas/categorical.py +822 -0
  246. snowflake/snowpark_connect/includes/python/pyspark/pandas/config.py +539 -0
  247. snowflake/snowpark_connect/includes/python/pyspark/pandas/correlation.py +262 -0
  248. snowflake/snowpark_connect/includes/python/pyspark/pandas/data_type_ops/__init__.py +16 -0
  249. snowflake/snowpark_connect/includes/python/pyspark/pandas/data_type_ops/base.py +519 -0
  250. snowflake/snowpark_connect/includes/python/pyspark/pandas/data_type_ops/binary_ops.py +98 -0
  251. snowflake/snowpark_connect/includes/python/pyspark/pandas/data_type_ops/boolean_ops.py +426 -0
  252. snowflake/snowpark_connect/includes/python/pyspark/pandas/data_type_ops/categorical_ops.py +141 -0
  253. snowflake/snowpark_connect/includes/python/pyspark/pandas/data_type_ops/complex_ops.py +145 -0
  254. snowflake/snowpark_connect/includes/python/pyspark/pandas/data_type_ops/date_ops.py +127 -0
  255. snowflake/snowpark_connect/includes/python/pyspark/pandas/data_type_ops/datetime_ops.py +171 -0
  256. snowflake/snowpark_connect/includes/python/pyspark/pandas/data_type_ops/null_ops.py +83 -0
  257. snowflake/snowpark_connect/includes/python/pyspark/pandas/data_type_ops/num_ops.py +588 -0
  258. snowflake/snowpark_connect/includes/python/pyspark/pandas/data_type_ops/string_ops.py +154 -0
  259. snowflake/snowpark_connect/includes/python/pyspark/pandas/data_type_ops/timedelta_ops.py +101 -0
  260. snowflake/snowpark_connect/includes/python/pyspark/pandas/data_type_ops/udt_ops.py +29 -0
  261. snowflake/snowpark_connect/includes/python/pyspark/pandas/datetimes.py +891 -0
  262. snowflake/snowpark_connect/includes/python/pyspark/pandas/exceptions.py +150 -0
  263. snowflake/snowpark_connect/includes/python/pyspark/pandas/extensions.py +388 -0
  264. snowflake/snowpark_connect/includes/python/pyspark/pandas/frame.py +13738 -0
  265. snowflake/snowpark_connect/includes/python/pyspark/pandas/generic.py +3560 -0
  266. snowflake/snowpark_connect/includes/python/pyspark/pandas/groupby.py +4448 -0
  267. snowflake/snowpark_connect/includes/python/pyspark/pandas/indexes/__init__.py +21 -0
  268. snowflake/snowpark_connect/includes/python/pyspark/pandas/indexes/base.py +2783 -0
  269. snowflake/snowpark_connect/includes/python/pyspark/pandas/indexes/category.py +773 -0
  270. snowflake/snowpark_connect/includes/python/pyspark/pandas/indexes/datetimes.py +843 -0
  271. snowflake/snowpark_connect/includes/python/pyspark/pandas/indexes/multi.py +1323 -0
  272. snowflake/snowpark_connect/includes/python/pyspark/pandas/indexes/numeric.py +210 -0
  273. snowflake/snowpark_connect/includes/python/pyspark/pandas/indexes/timedelta.py +197 -0
  274. snowflake/snowpark_connect/includes/python/pyspark/pandas/indexing.py +1862 -0
  275. snowflake/snowpark_connect/includes/python/pyspark/pandas/internal.py +1680 -0
  276. snowflake/snowpark_connect/includes/python/pyspark/pandas/missing/__init__.py +48 -0
  277. snowflake/snowpark_connect/includes/python/pyspark/pandas/missing/common.py +76 -0
  278. snowflake/snowpark_connect/includes/python/pyspark/pandas/missing/frame.py +63 -0
  279. snowflake/snowpark_connect/includes/python/pyspark/pandas/missing/general_functions.py +43 -0
  280. snowflake/snowpark_connect/includes/python/pyspark/pandas/missing/groupby.py +93 -0
  281. snowflake/snowpark_connect/includes/python/pyspark/pandas/missing/indexes.py +184 -0
  282. snowflake/snowpark_connect/includes/python/pyspark/pandas/missing/resample.py +101 -0
  283. snowflake/snowpark_connect/includes/python/pyspark/pandas/missing/scalars.py +29 -0
  284. snowflake/snowpark_connect/includes/python/pyspark/pandas/missing/series.py +69 -0
  285. snowflake/snowpark_connect/includes/python/pyspark/pandas/missing/window.py +168 -0
  286. snowflake/snowpark_connect/includes/python/pyspark/pandas/mlflow.py +238 -0
  287. snowflake/snowpark_connect/includes/python/pyspark/pandas/namespace.py +3807 -0
  288. snowflake/snowpark_connect/includes/python/pyspark/pandas/numpy_compat.py +260 -0
  289. snowflake/snowpark_connect/includes/python/pyspark/pandas/plot/__init__.py +17 -0
  290. snowflake/snowpark_connect/includes/python/pyspark/pandas/plot/core.py +1213 -0
  291. snowflake/snowpark_connect/includes/python/pyspark/pandas/plot/matplotlib.py +928 -0
  292. snowflake/snowpark_connect/includes/python/pyspark/pandas/plot/plotly.py +261 -0
  293. snowflake/snowpark_connect/includes/python/pyspark/pandas/resample.py +816 -0
  294. snowflake/snowpark_connect/includes/python/pyspark/pandas/series.py +7440 -0
  295. snowflake/snowpark_connect/includes/python/pyspark/pandas/sql_formatter.py +308 -0
  296. snowflake/snowpark_connect/includes/python/pyspark/pandas/sql_processor.py +394 -0
  297. snowflake/snowpark_connect/includes/python/pyspark/pandas/strings.py +2371 -0
  298. snowflake/snowpark_connect/includes/python/pyspark/pandas/supported_api_gen.py +378 -0
  299. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/__init__.py +16 -0
  300. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/__init__.py +16 -0
  301. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_any_all.py +177 -0
  302. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_apply_func.py +575 -0
  303. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_binary_ops.py +235 -0
  304. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_combine.py +653 -0
  305. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_compute.py +463 -0
  306. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_corrwith.py +86 -0
  307. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_cov.py +151 -0
  308. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_cumulative.py +139 -0
  309. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_describe.py +458 -0
  310. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_eval.py +86 -0
  311. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_melt.py +202 -0
  312. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_missing_data.py +520 -0
  313. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_pivot.py +361 -0
  314. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/__init__.py +16 -0
  315. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/__init__.py +16 -0
  316. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_any_all.py +40 -0
  317. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_apply_func.py +42 -0
  318. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_binary_ops.py +40 -0
  319. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_combine.py +37 -0
  320. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_compute.py +60 -0
  321. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_corrwith.py +40 -0
  322. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_cov.py +40 -0
  323. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_cumulative.py +90 -0
  324. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_describe.py +40 -0
  325. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_eval.py +40 -0
  326. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_melt.py +40 -0
  327. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_missing_data.py +42 -0
  328. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_pivot.py +37 -0
  329. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/__init__.py +16 -0
  330. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_base.py +36 -0
  331. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_binary_ops.py +42 -0
  332. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_boolean_ops.py +47 -0
  333. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_categorical_ops.py +55 -0
  334. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_complex_ops.py +40 -0
  335. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_date_ops.py +47 -0
  336. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_datetime_ops.py +47 -0
  337. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_null_ops.py +42 -0
  338. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_num_arithmetic.py +43 -0
  339. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_num_ops.py +47 -0
  340. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_num_reverse.py +43 -0
  341. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_string_ops.py +47 -0
  342. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_timedelta_ops.py +47 -0
  343. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_udt_ops.py +40 -0
  344. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/testing_utils.py +226 -0
  345. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/__init__.py +16 -0
  346. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_align.py +39 -0
  347. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_basic_slow.py +55 -0
  348. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_cov_corrwith.py +39 -0
  349. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_dot_frame.py +39 -0
  350. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_dot_series.py +39 -0
  351. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_index.py +39 -0
  352. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_series.py +39 -0
  353. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_setitem_frame.py +43 -0
  354. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_setitem_series.py +43 -0
  355. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/__init__.py +16 -0
  356. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_attrs.py +40 -0
  357. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_constructor.py +39 -0
  358. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_conversion.py +42 -0
  359. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_reindexing.py +42 -0
  360. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_reshaping.py +37 -0
  361. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_spark.py +40 -0
  362. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_take.py +42 -0
  363. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_time_series.py +48 -0
  364. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_truncate.py +40 -0
  365. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/__init__.py +16 -0
  366. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_aggregate.py +40 -0
  367. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_apply_func.py +41 -0
  368. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_cumulative.py +67 -0
  369. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_describe.py +40 -0
  370. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_groupby.py +55 -0
  371. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_head_tail.py +40 -0
  372. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_index.py +38 -0
  373. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_missing_data.py +55 -0
  374. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_split_apply.py +39 -0
  375. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_stat.py +38 -0
  376. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/__init__.py +16 -0
  377. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_align.py +40 -0
  378. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_base.py +50 -0
  379. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_category.py +73 -0
  380. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_datetime.py +39 -0
  381. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_indexing.py +40 -0
  382. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_reindex.py +40 -0
  383. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_rename.py +40 -0
  384. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_reset_index.py +48 -0
  385. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_timedelta.py +39 -0
  386. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/io/__init__.py +16 -0
  387. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/io/test_parity_io.py +40 -0
  388. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/plot/__init__.py +16 -0
  389. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/plot/test_parity_frame_plot.py +45 -0
  390. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/plot/test_parity_frame_plot_matplotlib.py +45 -0
  391. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/plot/test_parity_frame_plot_plotly.py +49 -0
  392. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/plot/test_parity_series_plot.py +37 -0
  393. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/plot/test_parity_series_plot_matplotlib.py +53 -0
  394. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/plot/test_parity_series_plot_plotly.py +45 -0
  395. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/__init__.py +16 -0
  396. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_all_any.py +38 -0
  397. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_arg_ops.py +37 -0
  398. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_as_of.py +37 -0
  399. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_as_type.py +38 -0
  400. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_compute.py +37 -0
  401. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_conversion.py +40 -0
  402. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_cumulative.py +40 -0
  403. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_index.py +38 -0
  404. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_missing_data.py +40 -0
  405. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_series.py +37 -0
  406. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_sort.py +38 -0
  407. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_stat.py +38 -0
  408. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_categorical.py +66 -0
  409. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_config.py +37 -0
  410. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_csv.py +37 -0
  411. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_dataframe_conversion.py +42 -0
  412. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_dataframe_spark_io.py +39 -0
  413. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_default_index.py +49 -0
  414. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_ewm.py +37 -0
  415. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_expanding.py +39 -0
  416. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_extension.py +49 -0
  417. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_frame_spark.py +53 -0
  418. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_generic_functions.py +43 -0
  419. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_indexing.py +49 -0
  420. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_indexops_spark.py +39 -0
  421. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_internal.py +41 -0
  422. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_namespace.py +39 -0
  423. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_numpy_compat.py +60 -0
  424. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_ops_on_diff_frames.py +48 -0
  425. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_ops_on_diff_frames_groupby.py +39 -0
  426. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_ops_on_diff_frames_groupby_expanding.py +44 -0
  427. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_ops_on_diff_frames_groupby_rolling.py +84 -0
  428. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_repr.py +37 -0
  429. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_resample.py +45 -0
  430. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_reshape.py +39 -0
  431. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_rolling.py +39 -0
  432. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_scalars.py +37 -0
  433. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_series_conversion.py +39 -0
  434. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_series_datetime.py +39 -0
  435. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_series_string.py +39 -0
  436. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_spark_functions.py +39 -0
  437. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_sql.py +43 -0
  438. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_stats.py +37 -0
  439. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_typedef.py +36 -0
  440. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_utils.py +37 -0
  441. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_window.py +39 -0
  442. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/__init__.py +16 -0
  443. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_base.py +107 -0
  444. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_binary_ops.py +224 -0
  445. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_boolean_ops.py +825 -0
  446. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_categorical_ops.py +562 -0
  447. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_complex_ops.py +368 -0
  448. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_date_ops.py +257 -0
  449. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_datetime_ops.py +260 -0
  450. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_null_ops.py +178 -0
  451. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_num_arithmetic.py +184 -0
  452. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_num_ops.py +497 -0
  453. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_num_reverse.py +140 -0
  454. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_string_ops.py +354 -0
  455. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_timedelta_ops.py +219 -0
  456. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_udt_ops.py +192 -0
  457. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/testing_utils.py +228 -0
  458. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/__init__.py +16 -0
  459. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_align.py +118 -0
  460. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_basic_slow.py +198 -0
  461. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_cov_corrwith.py +181 -0
  462. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_dot_frame.py +103 -0
  463. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_dot_series.py +141 -0
  464. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_index.py +109 -0
  465. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_series.py +136 -0
  466. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_setitem_frame.py +125 -0
  467. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_setitem_series.py +217 -0
  468. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/__init__.py +16 -0
  469. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_attrs.py +384 -0
  470. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_constructor.py +598 -0
  471. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_conversion.py +73 -0
  472. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_reindexing.py +869 -0
  473. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_reshaping.py +487 -0
  474. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_spark.py +309 -0
  475. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_take.py +156 -0
  476. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_time_series.py +149 -0
  477. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_truncate.py +163 -0
  478. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/__init__.py +16 -0
  479. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_aggregate.py +311 -0
  480. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_apply_func.py +524 -0
  481. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_cumulative.py +419 -0
  482. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_describe.py +144 -0
  483. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_groupby.py +979 -0
  484. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_head_tail.py +234 -0
  485. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_index.py +206 -0
  486. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_missing_data.py +421 -0
  487. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_split_apply.py +187 -0
  488. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_stat.py +397 -0
  489. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/__init__.py +16 -0
  490. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_align.py +100 -0
  491. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_base.py +2743 -0
  492. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_category.py +484 -0
  493. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_datetime.py +276 -0
  494. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_indexing.py +432 -0
  495. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_reindex.py +310 -0
  496. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_rename.py +257 -0
  497. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_reset_index.py +160 -0
  498. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_timedelta.py +128 -0
  499. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/io/__init__.py +16 -0
  500. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/io/test_io.py +137 -0
  501. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/plot/__init__.py +16 -0
  502. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/plot/test_frame_plot.py +170 -0
  503. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/plot/test_frame_plot_matplotlib.py +547 -0
  504. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/plot/test_frame_plot_plotly.py +285 -0
  505. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/plot/test_series_plot.py +106 -0
  506. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/plot/test_series_plot_matplotlib.py +409 -0
  507. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/plot/test_series_plot_plotly.py +247 -0
  508. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/__init__.py +16 -0
  509. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_all_any.py +105 -0
  510. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_arg_ops.py +197 -0
  511. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_as_of.py +137 -0
  512. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_as_type.py +227 -0
  513. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_compute.py +634 -0
  514. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_conversion.py +88 -0
  515. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_cumulative.py +139 -0
  516. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_index.py +475 -0
  517. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_missing_data.py +265 -0
  518. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_series.py +818 -0
  519. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_sort.py +162 -0
  520. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_stat.py +780 -0
  521. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_categorical.py +741 -0
  522. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_config.py +160 -0
  523. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_csv.py +453 -0
  524. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_dataframe_conversion.py +281 -0
  525. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_dataframe_spark_io.py +487 -0
  526. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_default_index.py +109 -0
  527. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_ewm.py +434 -0
  528. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_expanding.py +253 -0
  529. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_extension.py +152 -0
  530. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_frame_spark.py +162 -0
  531. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_generic_functions.py +234 -0
  532. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_indexing.py +1339 -0
  533. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_indexops_spark.py +82 -0
  534. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_internal.py +124 -0
  535. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_namespace.py +638 -0
  536. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_numpy_compat.py +200 -0
  537. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_ops_on_diff_frames.py +1355 -0
  538. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_ops_on_diff_frames_groupby.py +655 -0
  539. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_ops_on_diff_frames_groupby_expanding.py +113 -0
  540. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_ops_on_diff_frames_groupby_rolling.py +118 -0
  541. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_repr.py +192 -0
  542. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_resample.py +346 -0
  543. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_reshape.py +495 -0
  544. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_rolling.py +263 -0
  545. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_scalars.py +59 -0
  546. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_series_conversion.py +85 -0
  547. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_series_datetime.py +364 -0
  548. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_series_string.py +362 -0
  549. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_spark_functions.py +46 -0
  550. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_sql.py +123 -0
  551. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_stats.py +581 -0
  552. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_typedef.py +447 -0
  553. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_utils.py +301 -0
  554. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_window.py +465 -0
  555. snowflake/snowpark_connect/includes/python/pyspark/pandas/typedef/__init__.py +18 -0
  556. snowflake/snowpark_connect/includes/python/pyspark/pandas/typedef/typehints.py +874 -0
  557. snowflake/snowpark_connect/includes/python/pyspark/pandas/usage_logging/__init__.py +143 -0
  558. snowflake/snowpark_connect/includes/python/pyspark/pandas/usage_logging/usage_logger.py +132 -0
  559. snowflake/snowpark_connect/includes/python/pyspark/pandas/utils.py +1063 -0
  560. snowflake/snowpark_connect/includes/python/pyspark/pandas/window.py +2702 -0
  561. snowflake/snowpark_connect/includes/python/pyspark/profiler.py +489 -0
  562. snowflake/snowpark_connect/includes/python/pyspark/py.typed +1 -0
  563. snowflake/snowpark_connect/includes/python/pyspark/python/pyspark/shell.py +123 -0
  564. snowflake/snowpark_connect/includes/python/pyspark/rdd.py +5518 -0
  565. snowflake/snowpark_connect/includes/python/pyspark/rddsampler.py +115 -0
  566. snowflake/snowpark_connect/includes/python/pyspark/resource/__init__.py +38 -0
  567. snowflake/snowpark_connect/includes/python/pyspark/resource/information.py +69 -0
  568. snowflake/snowpark_connect/includes/python/pyspark/resource/profile.py +317 -0
  569. snowflake/snowpark_connect/includes/python/pyspark/resource/requests.py +539 -0
  570. snowflake/snowpark_connect/includes/python/pyspark/resource/tests/__init__.py +16 -0
  571. snowflake/snowpark_connect/includes/python/pyspark/resource/tests/test_resources.py +83 -0
  572. snowflake/snowpark_connect/includes/python/pyspark/resultiterable.py +45 -0
  573. snowflake/snowpark_connect/includes/python/pyspark/serializers.py +681 -0
  574. snowflake/snowpark_connect/includes/python/pyspark/shell.py +123 -0
  575. snowflake/snowpark_connect/includes/python/pyspark/shuffle.py +854 -0
  576. snowflake/snowpark_connect/includes/python/pyspark/sql/__init__.py +75 -0
  577. snowflake/snowpark_connect/includes/python/pyspark/sql/_typing.pyi +80 -0
  578. snowflake/snowpark_connect/includes/python/pyspark/sql/avro/__init__.py +18 -0
  579. snowflake/snowpark_connect/includes/python/pyspark/sql/avro/functions.py +188 -0
  580. snowflake/snowpark_connect/includes/python/pyspark/sql/catalog.py +1270 -0
  581. snowflake/snowpark_connect/includes/python/pyspark/sql/column.py +1431 -0
  582. snowflake/snowpark_connect/includes/python/pyspark/sql/conf.py +99 -0
  583. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/__init__.py +18 -0
  584. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/_typing.py +90 -0
  585. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/avro/__init__.py +18 -0
  586. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/avro/functions.py +107 -0
  587. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/catalog.py +356 -0
  588. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/client/__init__.py +22 -0
  589. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/client/artifact.py +412 -0
  590. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/client/core.py +1689 -0
  591. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/client/reattach.py +340 -0
  592. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/column.py +514 -0
  593. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/conf.py +128 -0
  594. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/conversion.py +490 -0
  595. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/dataframe.py +2172 -0
  596. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/expressions.py +1056 -0
  597. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/functions.py +3937 -0
  598. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/group.py +418 -0
  599. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/plan.py +2289 -0
  600. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/proto/__init__.py +25 -0
  601. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/proto/base_pb2.py +203 -0
  602. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/proto/base_pb2.pyi +2718 -0
  603. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/proto/base_pb2_grpc.py +423 -0
  604. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/proto/catalog_pb2.py +109 -0
  605. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/proto/catalog_pb2.pyi +1130 -0
  606. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/proto/commands_pb2.py +141 -0
  607. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/proto/commands_pb2.pyi +1766 -0
  608. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/proto/common_pb2.py +47 -0
  609. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/proto/common_pb2.pyi +123 -0
  610. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/proto/example_plugins_pb2.py +53 -0
  611. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/proto/example_plugins_pb2.pyi +112 -0
  612. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/proto/expressions_pb2.py +107 -0
  613. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/proto/expressions_pb2.pyi +1507 -0
  614. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/proto/relations_pb2.py +195 -0
  615. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/proto/relations_pb2.pyi +3613 -0
  616. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/proto/types_pb2.py +95 -0
  617. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/proto/types_pb2.pyi +980 -0
  618. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/protobuf/__init__.py +18 -0
  619. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/protobuf/functions.py +166 -0
  620. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/readwriter.py +861 -0
  621. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/session.py +952 -0
  622. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/streaming/__init__.py +22 -0
  623. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/streaming/query.py +295 -0
  624. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/streaming/readwriter.py +618 -0
  625. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/streaming/worker/__init__.py +18 -0
  626. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/streaming/worker/foreach_batch_worker.py +87 -0
  627. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/streaming/worker/listener_worker.py +100 -0
  628. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/types.py +301 -0
  629. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/udf.py +296 -0
  630. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/udtf.py +200 -0
  631. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/utils.py +58 -0
  632. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/window.py +266 -0
  633. snowflake/snowpark_connect/includes/python/pyspark/sql/context.py +818 -0
  634. snowflake/snowpark_connect/includes/python/pyspark/sql/dataframe.py +5973 -0
  635. snowflake/snowpark_connect/includes/python/pyspark/sql/functions.py +15889 -0
  636. snowflake/snowpark_connect/includes/python/pyspark/sql/group.py +547 -0
  637. snowflake/snowpark_connect/includes/python/pyspark/sql/observation.py +152 -0
  638. snowflake/snowpark_connect/includes/python/pyspark/sql/pandas/__init__.py +21 -0
  639. snowflake/snowpark_connect/includes/python/pyspark/sql/pandas/_typing/__init__.pyi +344 -0
  640. snowflake/snowpark_connect/includes/python/pyspark/sql/pandas/_typing/protocols/__init__.pyi +17 -0
  641. snowflake/snowpark_connect/includes/python/pyspark/sql/pandas/_typing/protocols/frame.pyi +20 -0
  642. snowflake/snowpark_connect/includes/python/pyspark/sql/pandas/_typing/protocols/series.pyi +20 -0
  643. snowflake/snowpark_connect/includes/python/pyspark/sql/pandas/conversion.py +671 -0
  644. snowflake/snowpark_connect/includes/python/pyspark/sql/pandas/functions.py +480 -0
  645. snowflake/snowpark_connect/includes/python/pyspark/sql/pandas/functions.pyi +132 -0
  646. snowflake/snowpark_connect/includes/python/pyspark/sql/pandas/group_ops.py +523 -0
  647. snowflake/snowpark_connect/includes/python/pyspark/sql/pandas/map_ops.py +216 -0
  648. snowflake/snowpark_connect/includes/python/pyspark/sql/pandas/serializers.py +1019 -0
  649. snowflake/snowpark_connect/includes/python/pyspark/sql/pandas/typehints.py +172 -0
  650. snowflake/snowpark_connect/includes/python/pyspark/sql/pandas/types.py +972 -0
  651. snowflake/snowpark_connect/includes/python/pyspark/sql/pandas/utils.py +86 -0
  652. snowflake/snowpark_connect/includes/python/pyspark/sql/protobuf/__init__.py +18 -0
  653. snowflake/snowpark_connect/includes/python/pyspark/sql/protobuf/functions.py +334 -0
  654. snowflake/snowpark_connect/includes/python/pyspark/sql/readwriter.py +2159 -0
  655. snowflake/snowpark_connect/includes/python/pyspark/sql/session.py +2088 -0
  656. snowflake/snowpark_connect/includes/python/pyspark/sql/sql_formatter.py +84 -0
  657. snowflake/snowpark_connect/includes/python/pyspark/sql/streaming/__init__.py +21 -0
  658. snowflake/snowpark_connect/includes/python/pyspark/sql/streaming/listener.py +1050 -0
  659. snowflake/snowpark_connect/includes/python/pyspark/sql/streaming/query.py +746 -0
  660. snowflake/snowpark_connect/includes/python/pyspark/sql/streaming/readwriter.py +1652 -0
  661. snowflake/snowpark_connect/includes/python/pyspark/sql/streaming/state.py +288 -0
  662. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/__init__.py +16 -0
  663. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/__init__.py +16 -0
  664. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/client/__init__.py +16 -0
  665. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/client/test_artifact.py +420 -0
  666. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/client/test_client.py +358 -0
  667. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/streaming/__init__.py +16 -0
  668. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/streaming/test_parity_foreach.py +36 -0
  669. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/streaming/test_parity_foreach_batch.py +44 -0
  670. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/streaming/test_parity_listener.py +116 -0
  671. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/streaming/test_parity_streaming.py +35 -0
  672. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_connect_basic.py +3612 -0
  673. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_connect_column.py +1042 -0
  674. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_connect_function.py +2381 -0
  675. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_connect_plan.py +1060 -0
  676. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_arrow.py +163 -0
  677. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_arrow_map.py +38 -0
  678. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_arrow_python_udf.py +48 -0
  679. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_catalog.py +36 -0
  680. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_column.py +55 -0
  681. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_conf.py +36 -0
  682. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_dataframe.py +96 -0
  683. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_datasources.py +44 -0
  684. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_errors.py +36 -0
  685. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_functions.py +59 -0
  686. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_group.py +36 -0
  687. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_pandas_cogrouped_map.py +59 -0
  688. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_pandas_grouped_map.py +74 -0
  689. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_pandas_grouped_map_with_state.py +62 -0
  690. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_pandas_map.py +58 -0
  691. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_pandas_udf.py +70 -0
  692. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_pandas_udf_grouped_agg.py +50 -0
  693. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_pandas_udf_scalar.py +68 -0
  694. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_pandas_udf_window.py +40 -0
  695. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_readwriter.py +46 -0
  696. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_serde.py +44 -0
  697. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_types.py +100 -0
  698. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_udf.py +100 -0
  699. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_udtf.py +163 -0
  700. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_session.py +181 -0
  701. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_utils.py +42 -0
  702. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/__init__.py +16 -0
  703. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_cogrouped_map.py +623 -0
  704. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_grouped_map.py +869 -0
  705. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_grouped_map_with_state.py +342 -0
  706. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_map.py +436 -0
  707. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_udf.py +363 -0
  708. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_udf_grouped_agg.py +592 -0
  709. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_udf_scalar.py +1503 -0
  710. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_udf_typehints.py +392 -0
  711. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_udf_typehints_with_future_annotations.py +375 -0
  712. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_udf_window.py +411 -0
  713. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/streaming/__init__.py +16 -0
  714. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/streaming/test_streaming.py +401 -0
  715. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/streaming/test_streaming_foreach.py +295 -0
  716. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/streaming/test_streaming_foreach_batch.py +106 -0
  717. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/streaming/test_streaming_listener.py +558 -0
  718. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_arrow.py +1346 -0
  719. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_arrow_map.py +182 -0
  720. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_arrow_python_udf.py +202 -0
  721. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_catalog.py +503 -0
  722. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_column.py +225 -0
  723. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_conf.py +83 -0
  724. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_context.py +201 -0
  725. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_dataframe.py +1931 -0
  726. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_datasources.py +256 -0
  727. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_errors.py +69 -0
  728. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_functions.py +1349 -0
  729. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_group.py +53 -0
  730. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_pandas_sqlmetrics.py +68 -0
  731. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_readwriter.py +283 -0
  732. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_serde.py +155 -0
  733. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_session.py +412 -0
  734. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_types.py +1581 -0
  735. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_udf.py +961 -0
  736. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_udf_profiler.py +165 -0
  737. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_udtf.py +1456 -0
  738. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_utils.py +1686 -0
  739. snowflake/snowpark_connect/includes/python/pyspark/sql/types.py +2558 -0
  740. snowflake/snowpark_connect/includes/python/pyspark/sql/udf.py +714 -0
  741. snowflake/snowpark_connect/includes/python/pyspark/sql/udtf.py +325 -0
  742. snowflake/snowpark_connect/includes/python/pyspark/sql/utils.py +339 -0
  743. snowflake/snowpark_connect/includes/python/pyspark/sql/window.py +492 -0
  744. snowflake/snowpark_connect/includes/python/pyspark/statcounter.py +165 -0
  745. snowflake/snowpark_connect/includes/python/pyspark/status.py +112 -0
  746. snowflake/snowpark_connect/includes/python/pyspark/storagelevel.py +97 -0
  747. snowflake/snowpark_connect/includes/python/pyspark/streaming/__init__.py +22 -0
  748. snowflake/snowpark_connect/includes/python/pyspark/streaming/context.py +471 -0
  749. snowflake/snowpark_connect/includes/python/pyspark/streaming/dstream.py +933 -0
  750. snowflake/snowpark_connect/includes/python/pyspark/streaming/kinesis.py +205 -0
  751. snowflake/snowpark_connect/includes/python/pyspark/streaming/listener.py +83 -0
  752. snowflake/snowpark_connect/includes/python/pyspark/streaming/tests/__init__.py +16 -0
  753. snowflake/snowpark_connect/includes/python/pyspark/streaming/tests/test_context.py +184 -0
  754. snowflake/snowpark_connect/includes/python/pyspark/streaming/tests/test_dstream.py +706 -0
  755. snowflake/snowpark_connect/includes/python/pyspark/streaming/tests/test_kinesis.py +118 -0
  756. snowflake/snowpark_connect/includes/python/pyspark/streaming/tests/test_listener.py +160 -0
  757. snowflake/snowpark_connect/includes/python/pyspark/streaming/util.py +168 -0
  758. snowflake/snowpark_connect/includes/python/pyspark/taskcontext.py +502 -0
  759. snowflake/snowpark_connect/includes/python/pyspark/testing/__init__.py +21 -0
  760. snowflake/snowpark_connect/includes/python/pyspark/testing/connectutils.py +199 -0
  761. snowflake/snowpark_connect/includes/python/pyspark/testing/mllibutils.py +30 -0
  762. snowflake/snowpark_connect/includes/python/pyspark/testing/mlutils.py +275 -0
  763. snowflake/snowpark_connect/includes/python/pyspark/testing/objects.py +121 -0
  764. snowflake/snowpark_connect/includes/python/pyspark/testing/pandasutils.py +714 -0
  765. snowflake/snowpark_connect/includes/python/pyspark/testing/sqlutils.py +168 -0
  766. snowflake/snowpark_connect/includes/python/pyspark/testing/streamingutils.py +178 -0
  767. snowflake/snowpark_connect/includes/python/pyspark/testing/utils.py +636 -0
  768. snowflake/snowpark_connect/includes/python/pyspark/tests/__init__.py +16 -0
  769. snowflake/snowpark_connect/includes/python/pyspark/tests/test_appsubmit.py +306 -0
  770. snowflake/snowpark_connect/includes/python/pyspark/tests/test_broadcast.py +196 -0
  771. snowflake/snowpark_connect/includes/python/pyspark/tests/test_conf.py +44 -0
  772. snowflake/snowpark_connect/includes/python/pyspark/tests/test_context.py +346 -0
  773. snowflake/snowpark_connect/includes/python/pyspark/tests/test_daemon.py +89 -0
  774. snowflake/snowpark_connect/includes/python/pyspark/tests/test_install_spark.py +124 -0
  775. snowflake/snowpark_connect/includes/python/pyspark/tests/test_join.py +69 -0
  776. snowflake/snowpark_connect/includes/python/pyspark/tests/test_memory_profiler.py +167 -0
  777. snowflake/snowpark_connect/includes/python/pyspark/tests/test_pin_thread.py +194 -0
  778. snowflake/snowpark_connect/includes/python/pyspark/tests/test_profiler.py +168 -0
  779. snowflake/snowpark_connect/includes/python/pyspark/tests/test_rdd.py +939 -0
  780. snowflake/snowpark_connect/includes/python/pyspark/tests/test_rddbarrier.py +52 -0
  781. snowflake/snowpark_connect/includes/python/pyspark/tests/test_rddsampler.py +66 -0
  782. snowflake/snowpark_connect/includes/python/pyspark/tests/test_readwrite.py +368 -0
  783. snowflake/snowpark_connect/includes/python/pyspark/tests/test_serializers.py +257 -0
  784. snowflake/snowpark_connect/includes/python/pyspark/tests/test_shuffle.py +267 -0
  785. snowflake/snowpark_connect/includes/python/pyspark/tests/test_stage_sched.py +153 -0
  786. snowflake/snowpark_connect/includes/python/pyspark/tests/test_statcounter.py +130 -0
  787. snowflake/snowpark_connect/includes/python/pyspark/tests/test_taskcontext.py +350 -0
  788. snowflake/snowpark_connect/includes/python/pyspark/tests/test_util.py +97 -0
  789. snowflake/snowpark_connect/includes/python/pyspark/tests/test_worker.py +271 -0
  790. snowflake/snowpark_connect/includes/python/pyspark/traceback_utils.py +81 -0
  791. snowflake/snowpark_connect/includes/python/pyspark/util.py +416 -0
  792. snowflake/snowpark_connect/includes/python/pyspark/version.py +19 -0
  793. snowflake/snowpark_connect/includes/python/pyspark/worker.py +1307 -0
  794. snowflake/snowpark_connect/includes/python/pyspark/worker_util.py +46 -0
  795. snowflake/snowpark_connect/proto/__init__.py +10 -0
  796. snowflake/snowpark_connect/proto/control_pb2.py +35 -0
  797. snowflake/snowpark_connect/proto/control_pb2.pyi +38 -0
  798. snowflake/snowpark_connect/proto/control_pb2_grpc.py +183 -0
  799. snowflake/snowpark_connect/proto/snowflake_expression_ext_pb2.py +35 -0
  800. snowflake/snowpark_connect/proto/snowflake_expression_ext_pb2.pyi +53 -0
  801. snowflake/snowpark_connect/proto/snowflake_rdd_pb2.pyi +39 -0
  802. snowflake/snowpark_connect/proto/snowflake_relation_ext_pb2.py +47 -0
  803. snowflake/snowpark_connect/proto/snowflake_relation_ext_pb2.pyi +111 -0
  804. snowflake/snowpark_connect/relation/__init__.py +3 -0
  805. snowflake/snowpark_connect/relation/catalogs/__init__.py +12 -0
  806. snowflake/snowpark_connect/relation/catalogs/abstract_spark_catalog.py +287 -0
  807. snowflake/snowpark_connect/relation/catalogs/snowflake_catalog.py +467 -0
  808. snowflake/snowpark_connect/relation/catalogs/utils.py +51 -0
  809. snowflake/snowpark_connect/relation/io_utils.py +76 -0
  810. snowflake/snowpark_connect/relation/map_aggregate.py +322 -0
  811. snowflake/snowpark_connect/relation/map_catalog.py +151 -0
  812. snowflake/snowpark_connect/relation/map_column_ops.py +1068 -0
  813. snowflake/snowpark_connect/relation/map_crosstab.py +48 -0
  814. snowflake/snowpark_connect/relation/map_extension.py +412 -0
  815. snowflake/snowpark_connect/relation/map_join.py +341 -0
  816. snowflake/snowpark_connect/relation/map_local_relation.py +326 -0
  817. snowflake/snowpark_connect/relation/map_map_partitions.py +146 -0
  818. snowflake/snowpark_connect/relation/map_relation.py +253 -0
  819. snowflake/snowpark_connect/relation/map_row_ops.py +716 -0
  820. snowflake/snowpark_connect/relation/map_sample_by.py +35 -0
  821. snowflake/snowpark_connect/relation/map_show_string.py +50 -0
  822. snowflake/snowpark_connect/relation/map_sql.py +1874 -0
  823. snowflake/snowpark_connect/relation/map_stats.py +324 -0
  824. snowflake/snowpark_connect/relation/map_subquery_alias.py +32 -0
  825. snowflake/snowpark_connect/relation/map_udtf.py +288 -0
  826. snowflake/snowpark_connect/relation/read/__init__.py +7 -0
  827. snowflake/snowpark_connect/relation/read/jdbc_read_dbapi.py +668 -0
  828. snowflake/snowpark_connect/relation/read/map_read.py +367 -0
  829. snowflake/snowpark_connect/relation/read/map_read_csv.py +142 -0
  830. snowflake/snowpark_connect/relation/read/map_read_jdbc.py +108 -0
  831. snowflake/snowpark_connect/relation/read/map_read_json.py +344 -0
  832. snowflake/snowpark_connect/relation/read/map_read_parquet.py +194 -0
  833. snowflake/snowpark_connect/relation/read/map_read_socket.py +59 -0
  834. snowflake/snowpark_connect/relation/read/map_read_table.py +109 -0
  835. snowflake/snowpark_connect/relation/read/map_read_text.py +106 -0
  836. snowflake/snowpark_connect/relation/read/reader_config.py +399 -0
  837. snowflake/snowpark_connect/relation/read/utils.py +155 -0
  838. snowflake/snowpark_connect/relation/stage_locator.py +161 -0
  839. snowflake/snowpark_connect/relation/utils.py +219 -0
  840. snowflake/snowpark_connect/relation/write/__init__.py +3 -0
  841. snowflake/snowpark_connect/relation/write/jdbc_write_dbapi.py +339 -0
  842. snowflake/snowpark_connect/relation/write/map_write.py +436 -0
  843. snowflake/snowpark_connect/relation/write/map_write_jdbc.py +48 -0
  844. snowflake/snowpark_connect/resources/java_udfs-1.0-SNAPSHOT.jar +0 -0
  845. snowflake/snowpark_connect/resources_initializer.py +75 -0
  846. snowflake/snowpark_connect/server.py +1136 -0
  847. snowflake/snowpark_connect/start_server.py +32 -0
  848. snowflake/snowpark_connect/tcm.py +8 -0
  849. snowflake/snowpark_connect/type_mapping.py +1003 -0
  850. snowflake/snowpark_connect/typed_column.py +94 -0
  851. snowflake/snowpark_connect/utils/__init__.py +3 -0
  852. snowflake/snowpark_connect/utils/artifacts.py +48 -0
  853. snowflake/snowpark_connect/utils/attribute_handling.py +72 -0
  854. snowflake/snowpark_connect/utils/cache.py +84 -0
  855. snowflake/snowpark_connect/utils/concurrent.py +124 -0
  856. snowflake/snowpark_connect/utils/context.py +390 -0
  857. snowflake/snowpark_connect/utils/describe_query_cache.py +231 -0
  858. snowflake/snowpark_connect/utils/interrupt.py +85 -0
  859. snowflake/snowpark_connect/utils/io_utils.py +35 -0
  860. snowflake/snowpark_connect/utils/pandas_udtf_utils.py +117 -0
  861. snowflake/snowpark_connect/utils/profiling.py +47 -0
  862. snowflake/snowpark_connect/utils/session.py +180 -0
  863. snowflake/snowpark_connect/utils/snowpark_connect_logging.py +38 -0
  864. snowflake/snowpark_connect/utils/telemetry.py +513 -0
  865. snowflake/snowpark_connect/utils/udf_cache.py +392 -0
  866. snowflake/snowpark_connect/utils/udf_helper.py +328 -0
  867. snowflake/snowpark_connect/utils/udf_utils.py +310 -0
  868. snowflake/snowpark_connect/utils/udtf_helper.py +420 -0
  869. snowflake/snowpark_connect/utils/udtf_utils.py +799 -0
  870. snowflake/snowpark_connect/utils/xxhash64.py +247 -0
  871. snowflake/snowpark_connect/version.py +6 -0
  872. snowpark_connect-0.20.2.data/scripts/snowpark-connect +71 -0
  873. snowpark_connect-0.20.2.data/scripts/snowpark-session +11 -0
  874. snowpark_connect-0.20.2.data/scripts/snowpark-submit +354 -0
  875. snowpark_connect-0.20.2.dist-info/METADATA +37 -0
  876. snowpark_connect-0.20.2.dist-info/RECORD +879 -0
  877. snowpark_connect-0.20.2.dist-info/WHEEL +5 -0
  878. snowpark_connect-0.20.2.dist-info/licenses/LICENSE.txt +202 -0
  879. snowpark_connect-0.20.2.dist-info/top_level.txt +1 -0
@@ -0,0 +1,1068 @@
1
+ #
2
+ # Copyright (c) 2012-2025 Snowflake Computing Inc. All rights reserved.
3
+ #
4
+
5
+ import ast
6
+ import json
7
+ import sys
8
+ from collections import defaultdict
9
+
10
+ import pyspark.sql.connect.proto.expressions_pb2 as expressions_proto
11
+ import pyspark.sql.connect.proto.relations_pb2 as relation_proto
12
+ import pyspark.sql.connect.proto.types_pb2 as types_proto
13
+ from pyspark.errors.exceptions.base import AnalysisException
14
+ from pyspark.serializers import CloudPickleSerializer
15
+
16
+ import snowflake.snowpark.functions as snowpark_fn
17
+ import snowflake.snowpark.types as snowpark_types
18
+ from snowflake import snowpark
19
+ from snowflake.snowpark._internal.analyzer.analyzer_utils import unquote_if_quoted
20
+ from snowflake.snowpark._internal.analyzer.expression import (
21
+ Attribute,
22
+ NamedExpression,
23
+ UnresolvedAttribute,
24
+ )
25
+
26
+ # These internal util functions and classes are unlikely to change in Snowpark, so importing them directly
27
+ from snowflake.snowpark._internal.utils import generate_random_alphanumeric
28
+ from snowflake.snowpark.column import Column
29
+ from snowflake.snowpark.table_function import _ExplodeFunctionCall
30
+ from snowflake.snowpark.types import DataType, StructField, StructType, _NumericType
31
+ from snowflake.snowpark_connect.column_name_handler import (
32
+ make_column_names_snowpark_compatible,
33
+ set_schema_getter,
34
+ with_column_map,
35
+ )
36
+ from snowflake.snowpark_connect.config import global_config
37
+ from snowflake.snowpark_connect.error.error_utils import SparkException
38
+ from snowflake.snowpark_connect.expression.map_expression import (
39
+ map_alias,
40
+ map_expression,
41
+ map_single_column_expression,
42
+ )
43
+ from snowflake.snowpark_connect.expression.map_unresolved_function import unwrap_literal
44
+ from snowflake.snowpark_connect.expression.typer import ExpressionTyper
45
+ from snowflake.snowpark_connect.relation.map_relation import map_relation
46
+ from snowflake.snowpark_connect.relation.utils import (
47
+ TYPE_MAP_FOR_TO_SCHEMA,
48
+ snowpark_functions_col,
49
+ )
50
+ from snowflake.snowpark_connect.type_mapping import (
51
+ map_snowpark_to_pyspark_types,
52
+ proto_to_snowpark_type,
53
+ )
54
+ from snowflake.snowpark_connect.typed_column import TypedColumn
55
+ from snowflake.snowpark_connect.utils import context
56
+ from snowflake.snowpark_connect.utils.attribute_handling import (
57
+ split_fully_qualified_spark_name,
58
+ )
59
+ from snowflake.snowpark_connect.utils.context import (
60
+ clear_lca_alias_map,
61
+ register_lca_alias,
62
+ )
63
+ from snowflake.snowpark_connect.utils.udtf_helper import (
64
+ TEST_FLAG_FORCE_CREATE_SPROC,
65
+ create_apply_udtf_in_sproc,
66
+ )
67
+
68
+
69
+ def map_drop(
70
+ rel: relation_proto.Relation,
71
+ ) -> snowpark.DataFrame:
72
+ """
73
+ Drop columns from a DataFrame.
74
+
75
+ The drop is a list of expressions that is applied to the DataFrame.
76
+ """
77
+ input_df: snowpark.DataFrame = map_relation(rel.drop.input)
78
+ typer = ExpressionTyper(input_df)
79
+ columns_to_drop_with_names = []
80
+ for exp in rel.drop.columns:
81
+ if exp.WhichOneof("expr_type") == "unresolved_attribute":
82
+ try:
83
+ columns_to_drop_with_names.append(
84
+ map_single_column_expression(exp, input_df._column_map, typer)
85
+ )
86
+ except AnalysisException as e:
87
+ if "[COLUMN_NOT_FOUND]" in e.message:
88
+ pass # Ignore columns that are not found
89
+ else:
90
+ raise
91
+ columns_to_drop: list[Column] = [
92
+ col[1].col for col in columns_to_drop_with_names
93
+ ] + [
94
+ snowpark_functions_col(c, input_df._column_map)
95
+ for c in input_df._column_map.get_snowpark_column_names_from_spark_column_names(
96
+ list(rel.drop.column_names)
97
+ )
98
+ if c is not None
99
+ ]
100
+ # Sometimes we get a drop query with only invalid names. In this case, we return
101
+ # the input DataFrame.
102
+ if len(columns_to_drop) == 0:
103
+ return input_df
104
+
105
+ def _get_column_names_to_drop() -> list[str]:
106
+ # more or less copied from Snowpark's DataFrame::drop
107
+ names = []
108
+ for c in columns_to_drop:
109
+ if isinstance(c._expression, Attribute):
110
+ names.append(
111
+ input_df._plan.expr_to_alias.get(
112
+ c._expression.expr_id, c._expression.name
113
+ )
114
+ )
115
+ elif (
116
+ isinstance(c._expression, UnresolvedAttribute)
117
+ and c._expression.df_alias
118
+ ):
119
+ names.append(
120
+ input_df.self._plan.df_aliased_col_name_to_real_col_name.get(
121
+ c._expression.name, c._expression.name
122
+ )
123
+ )
124
+ elif isinstance(c._expression, NamedExpression):
125
+ names.append(c._expression.name)
126
+ return names
127
+
128
+ # Snowpark doesn't allow dropping all columns, so we have an EmptyDataFrame
129
+ # object to handle these cases.
130
+ try:
131
+ new_columns_names = input_df._column_map.get_snowpark_columns_after_drop(
132
+ _get_column_names_to_drop()
133
+ )
134
+ result: snowpark.DataFrame = input_df.drop(*columns_to_drop)
135
+ return with_column_map(
136
+ result,
137
+ input_df._column_map.get_spark_column_names_from_snowpark_column_names(
138
+ new_columns_names
139
+ ),
140
+ snowpark_column_names=new_columns_names,
141
+ column_qualifiers=input_df._column_map.get_qualifiers_for_columns_after_drop(
142
+ _get_column_names_to_drop()
143
+ ),
144
+ parent_column_name_map=input_df._column_map,
145
+ )
146
+ except snowpark.exceptions.SnowparkColumnException:
147
+ from snowflake.snowpark_connect.empty_dataframe import EmptyDataFrame
148
+
149
+ return EmptyDataFrame()
150
+
151
+
152
+ def map_project(rel: relation_proto.Relation) -> snowpark.DataFrame:
153
+ """
154
+ Project column(s).
155
+
156
+ Projections come in as expressions, which are mapped to `snowpark.Column`
157
+ objects.
158
+ """
159
+ if rel.project.HasField("input"):
160
+ input_df = map_relation(rel.project.input)
161
+ else:
162
+ # Create a dataframe to represent a OneRowRelation AST node.
163
+ # XXX: Snowflake does not support 0-column tables, so create a dummy column;
164
+ # its name does not seem to show up anywhere.
165
+ session = snowpark.Session.get_active_session()
166
+ input_df = with_column_map(
167
+ session.create_dataframe([None], ["__DUMMY"]),
168
+ ["__DUMMY"],
169
+ ["__DUMMY"],
170
+ )
171
+ context.set_df_before_projection(input_df)
172
+ expressions: list[expressions_proto.Expression] = rel.project.expressions
173
+ if not expressions:
174
+ # XXX: Snowflake does not support 0-column tables, so create a dummy column;
175
+ # its name will unforunately be user-visible.
176
+ expressions = [
177
+ expressions_proto.Expression(
178
+ alias=expressions_proto.Expression.Alias(
179
+ expr=expressions_proto.Expression(
180
+ literal=expressions_proto.Expression.Literal(
181
+ null=types_proto.DataType(null=types_proto.DataType.NULL())
182
+ )
183
+ ),
184
+ name=[""],
185
+ ),
186
+ )
187
+ ]
188
+
189
+ select_list = []
190
+ new_spark_columns = []
191
+ new_snowpark_columns = []
192
+
193
+ # LCA Support: build DataFrame progressively to allow later expressions to reference earlier aliases
194
+ clear_lca_alias_map()
195
+
196
+ # Collect aliases to batch process them
197
+ pending_aliases = [] # List of (spark_name, snowpark_col, aliased_col, alias_types)
198
+
199
+ # Detect if this is a simple projection (only unqualified column references, no aliases or functions)
200
+ # Qualified column references (with plan_id) should NOT be considered simple projections
201
+ # because they've already been resolved to specific DataFrames
202
+ def _is_simple_projection(exp: expressions_proto.Expression) -> bool:
203
+ return exp.WhichOneof(
204
+ "expr_type"
205
+ ) == "unresolved_attribute" and not exp.unresolved_attribute.HasField(
206
+ "plan_id"
207
+ ) # No DataFrame qualification
208
+
209
+ column_types = []
210
+ has_multi_column_alias = False
211
+ qualifiers = []
212
+
213
+ typer = ExpressionTyper(input_df)
214
+
215
+ has_unresolved_star = any(
216
+ exp.WhichOneof("expr_type") == "unresolved_star" for exp in expressions
217
+ )
218
+
219
+ for exp in expressions:
220
+ new_spark_names, mapper = map_expression(exp, input_df._column_map, typer)
221
+ if len(new_spark_names) == 1 and not isinstance(
222
+ mapper.col, _ExplodeFunctionCall
223
+ ):
224
+ # For simple projections of existing columns, try to preserve the original Snowpark names
225
+ # But only for truly unqualified column references, not for qualified ones like df.column
226
+ spark_name = new_spark_names[0]
227
+
228
+ # Check if this was a qualified column reference (like df_alias.column)
229
+ # by checking if the original expression was an alias lookup
230
+ is_qualified_reference = (
231
+ exp.WhichOneof("expr_type") == "unresolved_attribute"
232
+ and "." in exp.unresolved_attribute.unparsed_identifier
233
+ )
234
+
235
+ if (
236
+ _is_simple_projection(exp)
237
+ and not is_qualified_reference
238
+ and not has_unresolved_star
239
+ ):
240
+ # Try to get the existing Snowpark column name for this Spark column
241
+ existing_snowpark_name = input_df._column_map.get_snowpark_column_name_from_spark_column_name(
242
+ spark_name, allow_non_exists=True
243
+ )
244
+
245
+ # Only preserve if we found a unique existing name and it's not already used
246
+ if (
247
+ existing_snowpark_name is not None
248
+ and existing_snowpark_name not in new_snowpark_columns
249
+ ):
250
+ snowpark_column = existing_snowpark_name
251
+ else:
252
+ # Generate new name if we can't preserve
253
+ snowpark_column = make_column_names_snowpark_compatible(
254
+ [spark_name], rel.common.plan_id, len(new_snowpark_columns)
255
+ )[0]
256
+ else:
257
+ # Not a simple projection or is a qualified reference - generate new names
258
+ snowpark_column = make_column_names_snowpark_compatible(
259
+ [spark_name], rel.common.plan_id, len(new_snowpark_columns)
260
+ )[0]
261
+
262
+ aliased_col = mapper.col.alias(snowpark_column)
263
+ select_list.append(aliased_col)
264
+ new_snowpark_columns.append(snowpark_column)
265
+ new_spark_columns.append(spark_name)
266
+ column_types.extend(mapper.types)
267
+ qualifiers.append(mapper.get_qualifiers())
268
+
269
+ # Only update the DataFrame and register LCA for explicit aliases
270
+ if exp.WhichOneof("expr_type") == "alias":
271
+ # Collect alias for batch processing
272
+ pending_aliases.append(
273
+ (spark_name, snowpark_column, aliased_col, mapper.types)
274
+ )
275
+
276
+ # Register in LCA map immediately so subsequent expressions can resolve it
277
+ alias_types = mapper.types
278
+ typed_alias = TypedColumn(aliased_col, lambda types=alias_types: types)
279
+ register_lca_alias(spark_name, typed_alias)
280
+ else:
281
+ # Multi-column case ('select *', posexplode, explode, inline, etc.)
282
+ has_multi_column_alias = True
283
+ select_list.append(mapper.col)
284
+ result_columns = input_df.select(mapper.col).columns
285
+ new_snowpark_columns.extend(result_columns)
286
+ new_spark_columns.extend(new_spark_names)
287
+ column_types.extend(mapper.types)
288
+ qualifiers.extend(mapper.get_multi_col_qualifiers(len(new_spark_names)))
289
+
290
+ if pending_aliases:
291
+ # LCA case: create intermediate DataFrame with aliases, then do final projection
292
+ # pending_aliases contains (spark_name, snowpark_column, aliased_col, mapper.types)
293
+ old_cols = [alias[1] for alias in pending_aliases]
294
+ new_cols = [alias[2] for alias in pending_aliases]
295
+
296
+ intermediate_df = input_df.with_columns(old_cols, new_cols)
297
+
298
+ result = intermediate_df.select(*select_list)
299
+ else:
300
+ result = input_df.select(*select_list)
301
+
302
+ # Apply toDF renaming for multi-column aliasing
303
+ if has_multi_column_alias:
304
+ # Generate snowpark-compatible column names for multi-column aliases
305
+ final_snowpark_columns = make_column_names_snowpark_compatible(
306
+ new_spark_columns, rel.common.plan_id
307
+ )
308
+ result = result.toDF(*final_snowpark_columns)
309
+ new_snowpark_columns = final_snowpark_columns
310
+
311
+ return with_column_map(
312
+ result,
313
+ new_spark_columns,
314
+ new_snowpark_columns,
315
+ column_types,
316
+ column_metadata=input_df._column_map.column_metadata,
317
+ column_qualifiers=qualifiers,
318
+ parent_column_name_map=input_df._column_map,
319
+ )
320
+
321
+
322
+ def map_sort(sort: relation_proto.Sort) -> snowpark.DataFrame:
323
+ """
324
+ Implements DataFrame.sort().
325
+ """
326
+ input_df = map_relation(sort.input)
327
+ cols = []
328
+ ascending = [] # Ignored if all order values are set to "unspecified".
329
+ order_specified = False
330
+ typer = ExpressionTyper(input_df)
331
+
332
+ sort_order = sort.order
333
+
334
+ if len(sort_order) == 1:
335
+ parsed_col_name = split_fully_qualified_spark_name(
336
+ sort_order[0].child.unresolved_attribute.unparsed_identifier
337
+ )
338
+ if (
339
+ len(parsed_col_name) == 1
340
+ and parsed_col_name[0].lower() == "all"
341
+ and input_df._column_map.get_snowpark_column_name_from_spark_column_name(
342
+ parsed_col_name[0], allow_non_exists=True
343
+ )
344
+ is None
345
+ ):
346
+ # A single column with the name "all" needs to be expanded to all input columns.
347
+ sort_order = [
348
+ expressions_proto.Expression.SortOrder(
349
+ child=expressions_proto.Expression(
350
+ unresolved_attribute=expressions_proto.Expression.UnresolvedAttribute(
351
+ unparsed_identifier=col
352
+ )
353
+ ),
354
+ direction=sort_order[0].direction,
355
+ null_ordering=sort_order[0].null_ordering,
356
+ )
357
+ for col in input_df._column_map.get_spark_columns()
358
+ ]
359
+
360
+ for so in sort_order:
361
+ if so.child.HasField("literal"):
362
+ column_index = unwrap_literal(so.child)
363
+ try:
364
+ if column_index <= 0:
365
+ raise IndexError
366
+ col = input_df[column_index - 1]
367
+ except IndexError:
368
+ raise AnalysisException(
369
+ f"""[ORDER_BY_POS_OUT_OF_RANGE] ORDER BY position {column_index} is not in select list (valid range is [1, {len(input_df.columns)})])."""
370
+ )
371
+ else:
372
+ _, typed_column = map_single_column_expression(
373
+ so.child, input_df._column_map, typer
374
+ )
375
+ col = typed_column.col
376
+
377
+ match (so.direction, so.null_ordering):
378
+ case (
379
+ expressions_proto.Expression.SortOrder.SORT_DIRECTION_ASCENDING,
380
+ expressions_proto.Expression.SortOrder.SORT_NULLS_FIRST,
381
+ ):
382
+ col = col.asc_nulls_first()
383
+ case (
384
+ expressions_proto.Expression.SortOrder.SORT_DIRECTION_ASCENDING,
385
+ expressions_proto.Expression.SortOrder.SORT_NULLS_LAST,
386
+ ):
387
+ col = col.asc_nulls_last()
388
+ case (
389
+ expressions_proto.Expression.SortOrder.SORT_DIRECTION_DESCENDING,
390
+ expressions_proto.Expression.SortOrder.SORT_NULLS_FIRST,
391
+ ):
392
+ col = col.desc_nulls_first()
393
+ case (
394
+ expressions_proto.Expression.SortOrder.SORT_DIRECTION_DESCENDING,
395
+ expressions_proto.Expression.SortOrder.SORT_NULLS_LAST,
396
+ ):
397
+ col = col.desc_nulls_last()
398
+
399
+ cols.append(col)
400
+
401
+ ascending.append(
402
+ so.direction
403
+ == expressions_proto.Expression.SortOrder.SORT_DIRECTION_ASCENDING
404
+ )
405
+ if (
406
+ so.direction
407
+ != expressions_proto.Expression.SortOrder.SORT_DIRECTION_UNSPECIFIED
408
+ ):
409
+ order_specified = True
410
+
411
+ # TODO: sort.isglobal.
412
+ if not order_specified:
413
+ ascending = None
414
+ result = input_df.sort(cols, ascending=ascending)
415
+ result._column_map = input_df._column_map
416
+ result._table_name = input_df._table_name
417
+ set_schema_getter(result, lambda: input_df.schema)
418
+ return result
419
+
420
+
421
+ def map_to_df(rel: relation_proto.Relation) -> snowpark.DataFrame:
422
+ """
423
+ Transform the column names of the input DataFrame.
424
+ """
425
+ input_df: snowpark.DataFrame = map_relation(rel.to_df.input)
426
+ new_column_names = list(rel.to_df.column_names)
427
+ if len(new_column_names) != len(input_df._column_map.columns):
428
+ # TODO: Check error type here
429
+ raise ValueError(
430
+ "Number of column names must match number of columns in DataFrame"
431
+ )
432
+
433
+ snowpark_new_column_names = make_column_names_snowpark_compatible(
434
+ new_column_names, rel.common.plan_id
435
+ )
436
+
437
+ result = input_df.toDF(*snowpark_new_column_names)
438
+ if result._select_statement is not None:
439
+ # do not allow snowpark to flatten the to_df result
440
+ # TODO: remove after SNOW-2203706 is fixed
441
+ result._select_statement.flatten_disabled = True
442
+
443
+ def _get_schema():
444
+ return StructType(
445
+ [
446
+ StructField(n, f.datatype, _is_column=False)
447
+ for n, f in zip(snowpark_new_column_names, input_df.schema.fields)
448
+ ]
449
+ )
450
+
451
+ set_schema_getter(result, _get_schema)
452
+ result_with_column_map = with_column_map(
453
+ result,
454
+ new_column_names,
455
+ snowpark_column_names=snowpark_new_column_names,
456
+ )
457
+ context.set_df_before_projection(result_with_column_map)
458
+ return result_with_column_map
459
+
460
+
461
+ def map_to_schema(rel: relation_proto.Relation) -> snowpark.DataFrame:
462
+ """
463
+ Transform the column names of the input DataFrame.
464
+ """
465
+ input_df: snowpark.DataFrame = map_relation(rel.to_schema.input)
466
+ new_column_names = [field.name for field in rel.to_schema.schema.struct.fields]
467
+ snowpark_new_column_names = make_column_names_snowpark_compatible(
468
+ new_column_names, rel.common.plan_id
469
+ )
470
+ count_case_insensitive_column_names = defaultdict()
471
+ for key, value in input_df._column_map.spark_to_col.items():
472
+ count_case_insensitive_column_names[
473
+ key.lower()
474
+ ] = count_case_insensitive_column_names.get(key.lower(), 0) + len(value)
475
+ already_existing_columns = [
476
+ column
477
+ for column in new_column_names
478
+ if column.lower() in count_case_insensitive_column_names
479
+ ]
480
+ # If we update the schema of the fields to change the nullable field, we need to check if it's valid or not
481
+ # This only concerns the case of going from nullable = False -> nullable = True and will raise an AnalysisException
482
+ for field in rel.to_schema.schema.struct.fields:
483
+ if field.name in already_existing_columns:
484
+ if count_case_insensitive_column_names[field.name.lower()] > 1:
485
+ raise AnalysisException(
486
+ f"[AMBIGUOUS_COLUMN_OR_FIELD] Column or field `{field.name}` is ambiguous and has {len(input_df._column_map.spark_to_col[field.name])} matches."
487
+ )
488
+ snowpark_name = None
489
+ for name in input_df._column_map.spark_to_col:
490
+ if name.lower() == field.name.lower():
491
+ snowpark_name = input_df._column_map.spark_to_col[name][
492
+ 0
493
+ ].snowpark_name
494
+ break
495
+ # Check nullable and type casting validation
496
+ for snowpark_field in input_df.schema.fields:
497
+ if snowpark_field.name == snowpark_name:
498
+ # PySpark allows nullable to non-nullable conversion for StructType.
499
+ if (
500
+ not field.nullable
501
+ and snowpark_field.nullable
502
+ and not isinstance(snowpark_field.datatype, StructType)
503
+ ):
504
+ raise AnalysisException(
505
+ f"[NULLABLE_COLUMN_OR_FIELD] Column or field `{field.name}` is nullable while it's required to be non-nullable."
506
+ )
507
+
508
+ # Check type casting validation
509
+ if not _can_cast_column_in_schema(
510
+ snowpark_field.datatype, proto_to_snowpark_type(field.data_type)
511
+ ):
512
+ raise AnalysisException(
513
+ f"""[INVALID_COLUMN_OR_FIELD_DATA_TYPE] Column or field `{field.name}` is of type "{map_snowpark_to_pyspark_types(proto_to_snowpark_type(field.data_type))}" while it's required to be "{map_snowpark_to_pyspark_types(snowpark_field.datatype)}"."""
514
+ )
515
+ if len(already_existing_columns) == len(new_column_names):
516
+ # All columns already exist, we're doing a simple update.
517
+ snowpark_new_column_names = []
518
+ for column in new_column_names:
519
+ for name in input_df._column_map.spark_to_col:
520
+ if name.lower() == column.lower():
521
+ snowpark_new_column_names.append(
522
+ input_df._column_map.spark_to_col[name][0].snowpark_name
523
+ )
524
+ result = input_df
525
+ elif len(already_existing_columns) == 0:
526
+ # All schema columns are new, drop all old columns and add the new ones.
527
+ new_columns = [
528
+ snowpark_fn.lit(None).alias(column_name)
529
+ for column_name in snowpark_new_column_names
530
+ ]
531
+ result = input_df.select(*new_columns)
532
+ else:
533
+ # Some columns already exist, some columns are new.
534
+ columns_to_add = []
535
+ # This list is created to preserve ordering
536
+ new_snowpark_new_column_names = []
537
+ for spark_column, snowpark_column in zip(
538
+ new_column_names, snowpark_new_column_names
539
+ ):
540
+ # If the column doesn't already exist, append the new Snowpark name to columns_to_add
541
+ if all(
542
+ spark_column.lower() != name.lower()
543
+ for name in input_df._column_map.spark_to_col
544
+ ):
545
+ columns_to_add.append(snowpark_column)
546
+ new_snowpark_new_column_names.append(snowpark_column)
547
+ else:
548
+ for name in input_df._column_map.spark_to_col:
549
+ # If the column does exist, append the original Snowpark name, We don't need to add this column.
550
+ if name.lower() == spark_column.lower():
551
+ new_snowpark_new_column_names.append(
552
+ input_df._column_map.spark_to_col[name][0].snowpark_name
553
+ )
554
+ # Add all columns introduced by the new schema.
555
+ new_columns = [
556
+ (
557
+ snowpark_fn.lit(None).alias(column_name)
558
+ if column_name in columns_to_add
559
+ else column_name
560
+ )
561
+ for column_name in new_snowpark_new_column_names
562
+ ]
563
+ result = input_df.select(*new_columns)
564
+ snowpark_new_column_names = new_snowpark_new_column_names
565
+ new_schema = rel.to_schema.schema
566
+ snowpark_schema: snowpark.types.StructType = proto_to_snowpark_type(new_schema)
567
+ result_with_casting = result.select(
568
+ *[
569
+ snowpark_fn.cast(col_name, snowpark_field.datatype).as_(col_name)
570
+ for col_name, snowpark_field in zip(
571
+ snowpark_new_column_names, snowpark_schema.fields
572
+ )
573
+ ]
574
+ )
575
+ column_metadata = {}
576
+ for field in rel.to_schema.schema.struct.fields:
577
+ if field.metadata:
578
+ try:
579
+ column_metadata[field.name] = ast.literal_eval(field.metadata)
580
+ except (ValueError, SyntaxError):
581
+ column_metadata[field.name] = None
582
+ else:
583
+ column_metadata[field.name] = None
584
+ return with_column_map(
585
+ result_with_casting,
586
+ new_column_names,
587
+ snowpark_column_names=snowpark_new_column_names,
588
+ snowpark_column_types=[field.datatype for field in snowpark_schema.fields],
589
+ column_metadata=column_metadata,
590
+ parent_column_name_map=input_df._column_map,
591
+ )
592
+
593
+
594
+ def map_with_columns_renamed(rel: relation_proto.Relation) -> snowpark.DataFrame:
595
+ """
596
+ Rename columns in a DataFrame.
597
+ """
598
+ input_df: snowpark.DataFrame = map_relation(rel.with_columns_renamed.input)
599
+ rename_columns_map = dict(rel.with_columns_renamed.rename_columns_map)
600
+
601
+ if not global_config.spark_sql_caseSensitive:
602
+ # store it as lower case to avoid case sensitivity issues.
603
+ rename_columns_map_original = {}
604
+ for k, v in rename_columns_map.items():
605
+ rename_columns_map_original[k.lower()] = k
606
+ rename_columns_map_original[v.lower()] = v
607
+ rename_columns_map = {
608
+ k.lower(): v.lower() for k, v in rename_columns_map.items()
609
+ }
610
+
611
+ # re-construct the rename chains based on the input dataframe.
612
+ if input_df._column_map.rename_chains:
613
+ for key, value in input_df._column_map.rename_chains.items():
614
+ if key in rename_columns_map:
615
+ # This is to handle the case where the same column is renamed multiple times.
616
+ # df.withColumnRenamed("a", "b").withColumnRenamed("a", "c")
617
+ # the result rename chain should be {"a" -> "c", "b" -> "c"}
618
+ latest_name = rename_columns_map[key]
619
+ rename_columns_map[key] = latest_name
620
+ rename_columns_map[value] = latest_name
621
+ elif value in rename_columns_map:
622
+ # This is to update historic rename chain.
623
+ # df.withColumnRenamed("a", "b").withColumnRenamed("b", "c")
624
+ # The rename chain "a" -> "b" should be updated to "a" -> "c" as b was renamed to c in the second rename.
625
+ # final rename chain should be {"a" -> "c", "b" -> "c"}
626
+ rename_columns_map[key] = rename_columns_map[value]
627
+ else:
628
+ # This just copies the renames from previous computed dataframe
629
+ rename_columns_map[key] = value
630
+
631
+ existing_columns = input_df._column_map.get_spark_columns()
632
+
633
+ # Validate for naming conflicts
634
+ new_names_list = list(dict(rel.with_columns_renamed.rename_columns_map).values())
635
+ seen = set()
636
+ for new_name in new_names_list:
637
+ if (global_config.spark_sql_caseSensitive and new_name in seen) or (
638
+ not global_config.spark_sql_caseSensitive
639
+ and new_name.lower() in [s.lower() for s in seen]
640
+ ):
641
+ raise AnalysisException(
642
+ f"[COLUMN_ALREADY_EXISTS] The column `{new_name}` already exists. Consider to choose another name or rename the existing column."
643
+ )
644
+ seen.add(new_name)
645
+
646
+ new_columns = []
647
+ for c in existing_columns:
648
+ if global_config.spark_sql_caseSensitive:
649
+ new_columns.append(rename_columns_map.get(c, c))
650
+ elif rename_columns_map.get(c.lower(), None) is not None:
651
+ new_columns.append(
652
+ rename_columns_map_original.get(rename_columns_map.get(c.lower()))
653
+ )
654
+ else:
655
+ new_columns.append(c)
656
+
657
+ # Creating a new df to avoid updating the state of cached dataframe.
658
+ new_df = input_df.select("*")
659
+ result_df = with_column_map(
660
+ new_df,
661
+ new_columns,
662
+ input_df._column_map.get_snowpark_columns(),
663
+ column_qualifiers=input_df._column_map.get_qualifiers(),
664
+ parent_column_name_map=input_df._column_map.get_parent_column_name_map(),
665
+ )
666
+ result_df._column_map.rename_chains = rename_columns_map
667
+
668
+ return result_df
669
+
670
+
671
+ def map_with_columns(rel: relation_proto.Relation) -> snowpark.DataFrame:
672
+ """
673
+ Add columns to a DataFrame.
674
+ """
675
+ input_df: snowpark.DataFrame = map_relation(rel.with_columns.input)
676
+ with_columns = [
677
+ map_alias(alias, input_df._column_map, ExpressionTyper(input_df))
678
+ for alias in rel.with_columns.aliases
679
+ ]
680
+ # TODO: This list needs to contain all unique column names, but the code below doesn't
681
+ # guarantee that.
682
+ with_columns_names = []
683
+ with_columns_exprs = []
684
+ with_columns_types = []
685
+ with_column_offset = len(input_df._column_map.get_spark_columns())
686
+ new_spark_names = []
687
+ seen_columns = set()
688
+ for names_list, expr in with_columns:
689
+ assert (
690
+ len(names_list) == 1
691
+ ), f"Expected single column name, got {len(names_list)}: {names_list}"
692
+ name = names_list[0]
693
+ name_normalized = input_df._column_map._normalized_spark_name(name)
694
+ if name_normalized in seen_columns:
695
+ raise ValueError(
696
+ f"[COLUMN_ALREADY_EXISTS] The column `{name}` already exists."
697
+ )
698
+ seen_columns.add(name_normalized)
699
+ # If the column name is already in the DataFrame, we replace it, so we use the
700
+ # mapping to get the correct column name.
701
+ if input_df._column_map.has_spark_column(name):
702
+ all_instances_of_spark_column_name = (
703
+ input_df._column_map.get_snowpark_column_names_from_spark_column_names(
704
+ [name]
705
+ )
706
+ )
707
+ if len(all_instances_of_spark_column_name) == 0:
708
+ raise KeyError(f"Spark column name {name} does not exist")
709
+ with_columns_names.extend(all_instances_of_spark_column_name)
710
+ with_columns_exprs.extend(
711
+ [expr.col] * len(all_instances_of_spark_column_name)
712
+ )
713
+ with_columns_types.extend(
714
+ expr.types * len(all_instances_of_spark_column_name)
715
+ )
716
+ new_spark_names.extend([name] * len(all_instances_of_spark_column_name))
717
+ else:
718
+ with_columns_names.append(
719
+ make_column_names_snowpark_compatible(
720
+ [name], rel.common.plan_id, with_column_offset
721
+ )[0]
722
+ )
723
+ with_column_offset += 1
724
+ with_columns_exprs.append(expr.col)
725
+ with_columns_types.extend(expr.types)
726
+ new_spark_names.append(name)
727
+
728
+ (
729
+ new_spark_columns,
730
+ new_snowpark_columns,
731
+ qualifiers,
732
+ ) = input_df._column_map.with_columns(new_spark_names, with_columns_names)
733
+
734
+ # dedup the change in columns at snowpark name level, this is required by the with columns functions
735
+ with_columns_names_deduped = []
736
+ with_columns_exprs_deduped = []
737
+ with_columns_types_deduped = []
738
+ seen = set()
739
+ for i, col_name in enumerate(with_columns_names):
740
+ if col_name not in seen:
741
+ seen.add(col_name)
742
+ with_columns_names_deduped.append(col_name)
743
+ with_columns_exprs_deduped.append(with_columns_exprs[i])
744
+ with_columns_types_deduped.append(with_columns_types[i])
745
+ result = input_df.with_columns(
746
+ with_columns_names_deduped, with_columns_exprs_deduped
747
+ ).select(*new_snowpark_columns)
748
+
749
+ snowpark_name_to_type = dict(
750
+ [(f.name, f.datatype) for f in input_df.schema.fields]
751
+ + list(zip(with_columns_names, with_columns_types))
752
+ )
753
+
754
+ column_metadata = input_df._column_map.column_metadata or {}
755
+ for alias in rel.with_columns.aliases:
756
+ # this logic is triggered for df.withMetadata function.
757
+ if alias.HasField("metadata") and len(alias.metadata.strip()) > 0:
758
+ # spark sends list of alias names with only one element in the list with alias name.
759
+ column_metadata[alias.name[0]] = json.loads(alias.metadata)
760
+
761
+ return with_column_map(
762
+ result,
763
+ new_spark_columns,
764
+ snowpark_column_names=new_snowpark_columns,
765
+ snowpark_column_types=[
766
+ snowpark_name_to_type.get(n) for n in new_snowpark_columns
767
+ ],
768
+ column_metadata=column_metadata,
769
+ column_qualifiers=qualifiers,
770
+ parent_column_name_map=input_df._column_map,
771
+ )
772
+
773
+
774
+ def map_unpivot(rel: relation_proto.Relation) -> snowpark.DataFrame:
775
+ # Spark API: df.unpivot([id_columns], [unpivot_columns], var_column, val_column)
776
+ # Snowpark API: df.unpivot(val_column, var_column, [unpivot_columns])
777
+ if rel.unpivot.HasField("values") and len(rel.unpivot.values.values) == 0:
778
+ raise SparkException.unpivot_requires_value_columns()
779
+
780
+ def get_lease_common_ancestor_classes(types: list[snowpark.types.DataType]) -> set:
781
+ mro_lists = [set(type.__class__.mro()) for type in types]
782
+ common_ancestors = set.intersection(*mro_lists)
783
+ common_ancestors.discard(object)
784
+ common_ancestors.discard(snowpark.types._AtomicType)
785
+ common_ancestors.discard(snowpark.types.DataType)
786
+ return common_ancestors
787
+
788
+ def should_cast_type(df: snowpark.DataFrame, col_names: list[str]) -> bool:
789
+ # TODO: Follow the Spark type casting semantics and cast input columns to their common parent type.
790
+ # Snowpark unpivot cannot handle columns with different types. For example, GS throws error
791
+ # CONFLICTING_UNPIVOT_COLUMN_TYPES if unpivot_col_names contains an int column and a double column.
792
+ # But Spark unpivot is able to handle such cases.
793
+ # This function only handles the case where the column list contains more than one numerical types.
794
+
795
+ type_column_list = [
796
+ (
797
+ f.datatype,
798
+ df._column_map.get_spark_column_name_from_snowpark_column_name(
799
+ snowpark_functions_col(f.name, df._column_map).get_name()
800
+ ),
801
+ )
802
+ for f in df.schema.fields
803
+ if snowpark_functions_col(f.name, df._column_map).get_name() in col_names
804
+ ]
805
+ type_iter, _ = zip(*type_column_list)
806
+ type_list = list(type_iter)
807
+ is_same_type = len(set(type_list)) <= 1
808
+ contains_numeric_type = any(
809
+ [isinstance(t, snowpark_types._NumericType) for t in type_list]
810
+ )
811
+ if not get_lease_common_ancestor_classes(type_list):
812
+ # TODO: match exactly how spark shows mismatched columns
813
+ raise SparkException.unpivot_value_data_type_mismatch(
814
+ ", ".join(
815
+ [
816
+ f"{dtype} {column_name}"
817
+ for (dtype, column_name) in type_column_list
818
+ ]
819
+ )
820
+ )
821
+ return not is_same_type and contains_numeric_type
822
+
823
+ def get_column_names(
824
+ relation: relation_proto.Relation, df: snowpark.DataFrame
825
+ ) -> tuple[list[str], list[str], list[str], list[str]]:
826
+ """This function takes the input Snowpark dataframe and the input relation,
827
+ and returns the Snowpark and Spark column names.
828
+
829
+ Returns:
830
+ spark_columns: contains the Spark column names in the result
831
+ id_col_names: contains the Snowpark id column names
832
+ unpivot_col_names: contains the Snowpark unpivot column names
833
+ unpivot_spark_names: contains the Spark unpivot column names
834
+ """
835
+ spark_columns = []
836
+ id_col_names = []
837
+ typer = ExpressionTyper(input_df)
838
+ for id_col in relation.unpivot.ids:
839
+ spark_name, typed_column = map_single_column_expression(
840
+ id_col, df._column_map, typer
841
+ )
842
+ id_col_names.append(typed_column.col.get_name())
843
+ spark_columns.append(spark_name)
844
+
845
+ # unpivot_col_names contains the Snowpark column names sent to GS.
846
+ # unpivot_spark_name contains the Spark column names.
847
+ unpivot_col_names = []
848
+ unpivot_spark_names = []
849
+ for v in relation.unpivot.values.values:
850
+ spark_name, typed_column = map_single_column_expression(
851
+ v, df._column_map, typer
852
+ )
853
+ unpivot_col_names.append(typed_column.col.get_name())
854
+ unpivot_spark_names.append(spark_name)
855
+
856
+ if not rel.unpivot.HasField("values"):
857
+ # When `values` is `None`, all non-id columns will be unpivoted.
858
+ for snowpark_name, spark_name in zip(
859
+ df._column_map.get_snowpark_columns(),
860
+ df._column_map.get_spark_columns(),
861
+ ):
862
+ if (
863
+ snowpark_functions_col(snowpark_name, df._column_map).get_name()
864
+ not in id_col_names
865
+ ):
866
+ unpivot_col_names.append(
867
+ snowpark_functions_col(snowpark_name, df._column_map).get_name()
868
+ )
869
+ unpivot_spark_names.append(spark_name)
870
+
871
+ spark_columns.append(relation.unpivot.variable_column_name)
872
+ spark_columns.append(relation.unpivot.value_column_name)
873
+ return spark_columns, id_col_names, unpivot_col_names, unpivot_spark_names
874
+
875
+ input_df: snowpark.DataFrame = map_relation(rel.unpivot.input)
876
+ (
877
+ spark_columns,
878
+ id_col_names,
879
+ unpivot_col_names,
880
+ unpivot_spark_names,
881
+ ) = get_column_names(rel, input_df)
882
+ (
883
+ snowpark_value_column_name,
884
+ snowpark_variable_column_name,
885
+ ) = make_column_names_snowpark_compatible(
886
+ [rel.unpivot.value_column_name, rel.unpivot.variable_column_name],
887
+ rel.common.plan_id,
888
+ len(spark_columns),
889
+ )
890
+ cast_type = should_cast_type(input_df, unpivot_col_names)
891
+
892
+ # column_project is the project that happens before unpivot. This projection is used to
893
+ # 1. preserve the id column, by projecting the id column to a random name.
894
+ # 2. perform type casting of the unpivot columns.
895
+ # column_reverse_project is the project that happens after unpivot. This project is used to
896
+ # 1. project the id column from the random name back to the original name.
897
+ # 2. perform case when postprocessing to fix the column names in the var column.
898
+ column_project = []
899
+ column_reverse_project = []
900
+ snowpark_columns = []
901
+ qualifiers = []
902
+ for c in input_df._column_map.get_snowpark_columns():
903
+ c_name = snowpark_functions_col(c, input_df._column_map).get_name()
904
+ if c_name in unpivot_col_names:
905
+ if cast_type:
906
+ column_project.append(
907
+ snowpark_functions_col(c, input_df._column_map)
908
+ .cast("DOUBLE")
909
+ .alias(c_name)
910
+ )
911
+ else:
912
+ column_project.append(snowpark_functions_col(c, input_df._column_map))
913
+ if c_name in id_col_names:
914
+ id_col_alias = "SES" + generate_random_alphanumeric().upper()
915
+ column_project.append(
916
+ snowpark_functions_col(c, input_df._column_map).alias(id_col_alias)
917
+ )
918
+ column_reverse_project.append(
919
+ snowpark_functions_col(id_col_alias, input_df._column_map).alias(c)
920
+ )
921
+ snowpark_columns.append(c)
922
+ qualifiers.append(input_df._column_map.get_qualifier_for_spark_column(c))
923
+
924
+ # Without the case when postprocessing, the result Spark dataframe is:
925
+ # +---+------------+------+
926
+ # |id | var | val |
927
+ # +---+------------+------+
928
+ # | 1 | INTSES1 | 10.0 |
929
+ # | 1 | DOUBLESES2 | 1.0 |
930
+ # +---+------------+------+
931
+ # which has wrong column names in the var column. The correct column names should be:
932
+ # +---+--------+------+
933
+ # |id | var | val |
934
+ # +---+--------+------+
935
+ # | 1 | int | 10.0 |
936
+ # | 1 | double | 1.0 |
937
+ # +---+--------+------+
938
+ # We need a case when postprocessing to convert the value in the var column.
939
+ post_process_variable_column = None
940
+ for snowpark_name, spark_name in zip(unpivot_col_names, unpivot_spark_names):
941
+ if post_process_variable_column is None:
942
+ post_process_variable_column = snowpark_fn.when(
943
+ snowpark_functions_col(
944
+ snowpark_variable_column_name, input_df._column_map
945
+ )
946
+ == unquote_if_quoted(snowpark_name),
947
+ spark_name,
948
+ )
949
+ else:
950
+ post_process_variable_column = post_process_variable_column.when(
951
+ snowpark_functions_col(
952
+ snowpark_variable_column_name, input_df._column_map
953
+ )
954
+ == unquote_if_quoted(snowpark_name),
955
+ spark_name,
956
+ )
957
+
958
+ column_reverse_project.append(
959
+ post_process_variable_column.alias(snowpark_variable_column_name)
960
+ )
961
+ snowpark_columns.append(snowpark_variable_column_name)
962
+ column_reverse_project.append(
963
+ snowpark_functions_col(snowpark_value_column_name, input_df._column_map)
964
+ )
965
+ snowpark_columns.append(snowpark_value_column_name)
966
+ qualifiers.extend([[]] * 2)
967
+
968
+ result = (
969
+ input_df.select(*column_project)
970
+ .unpivot(
971
+ snowpark_value_column_name,
972
+ snowpark_variable_column_name,
973
+ unpivot_col_names,
974
+ include_nulls=True,
975
+ )
976
+ .select(*column_reverse_project)
977
+ )
978
+ return with_column_map(
979
+ result,
980
+ spark_columns,
981
+ snowpark_columns,
982
+ column_qualifiers=qualifiers,
983
+ parent_column_name_map=input_df._column_map,
984
+ )
985
+
986
+
987
+ def map_group_map(rel: relation_proto.Relation) -> snowpark.DataFrame:
988
+ """
989
+ Add columns to a DataFrame.
990
+ """
991
+ input_df: snowpark.DataFrame = map_relation(rel.group_map.input)
992
+ grouping_expressions = rel.group_map.grouping_expressions
993
+ snowpark_grouping_expressions: list[snowpark.Column] = []
994
+ typer = ExpressionTyper(input_df)
995
+ group_name_list: list[str] = []
996
+ qualifiers = []
997
+ for exp in grouping_expressions:
998
+ new_name, snowpark_column = map_single_column_expression(
999
+ exp, input_df._column_map, typer
1000
+ )
1001
+ snowpark_grouping_expressions.append(snowpark_column.col)
1002
+ group_name_list.append(new_name)
1003
+ qualifiers.append(snowpark_column.get_qualifiers())
1004
+ if rel.group_map.func.python_udf is None:
1005
+ raise ValueError("group_map relation without python udf is not supported")
1006
+
1007
+ python_major, python_minor = rel.group_map.func.python_udf.python_ver.split(".")
1008
+ is_compatible_python = sys.version_info.major == int(
1009
+ python_major
1010
+ ) and sys.version_info.minor == int(python_minor)
1011
+
1012
+ output_type = proto_to_snowpark_type(rel.group_map.func.python_udf.output_type)
1013
+
1014
+ if not is_compatible_python or TEST_FLAG_FORCE_CREATE_SPROC:
1015
+ original_columns = None
1016
+ if input_df._column_map is not None:
1017
+ original_columns = [
1018
+ column.spark_name for column in input_df._column_map.columns
1019
+ ]
1020
+
1021
+ apply_udtf_temp_name = create_apply_udtf_in_sproc(
1022
+ rel.group_map.func.python_udf,
1023
+ rel.group_map.func.function_name,
1024
+ snowpark_grouping_expressions,
1025
+ original_columns,
1026
+ input_df.schema,
1027
+ )
1028
+
1029
+ group_by_df = input_df.group_by(*snowpark_grouping_expressions)
1030
+ inner_df = group_by_df._dataframe
1031
+
1032
+ result = inner_df.select(
1033
+ snowpark_fn.call_table_function(
1034
+ apply_udtf_temp_name, *inner_df.columns
1035
+ ).over(partition_by=snowpark_grouping_expressions)
1036
+ )
1037
+ else:
1038
+ (
1039
+ callable_func,
1040
+ _,
1041
+ ) = CloudPickleSerializer().loads(rel.group_map.func.python_udf.command)
1042
+ result = input_df.group_by(*snowpark_grouping_expressions).apply_in_pandas(
1043
+ callable_func, output_type
1044
+ )
1045
+
1046
+ qualifiers.extend([[]] * (len(result.columns) - len(group_name_list)))
1047
+ return with_column_map(
1048
+ result,
1049
+ [field.name for field in output_type],
1050
+ result.columns,
1051
+ column_qualifiers=qualifiers,
1052
+ parent_column_name_map=input_df._column_map,
1053
+ )
1054
+
1055
+
1056
+ def _can_cast_column_in_schema(
1057
+ initial_column_type: DataType, column_type_to_cast_to: DataType
1058
+ ) -> bool:
1059
+ # This helper function helps determine if a Column type is able to be casted to another type based off the
1060
+ # DataFrame.to(schema) function. There is a table tracking in the test_dataframe_to.py file.
1061
+ return any(
1062
+ isinstance(column_type_to_cast_to, t)
1063
+ for t in TYPE_MAP_FOR_TO_SCHEMA[
1064
+ type(initial_column_type)
1065
+ if not isinstance(initial_column_type, _NumericType)
1066
+ else _NumericType
1067
+ ]
1068
+ )