snowpark-connect 0.20.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of snowpark-connect might be problematic. Click here for more details.

Files changed (879) hide show
  1. snowflake/snowpark_connect/__init__.py +23 -0
  2. snowflake/snowpark_connect/analyze_plan/__init__.py +3 -0
  3. snowflake/snowpark_connect/analyze_plan/map_tree_string.py +38 -0
  4. snowflake/snowpark_connect/column_name_handler.py +735 -0
  5. snowflake/snowpark_connect/config.py +576 -0
  6. snowflake/snowpark_connect/constants.py +47 -0
  7. snowflake/snowpark_connect/control_server.py +52 -0
  8. snowflake/snowpark_connect/dataframe_name_handler.py +54 -0
  9. snowflake/snowpark_connect/date_time_format_mapping.py +399 -0
  10. snowflake/snowpark_connect/empty_dataframe.py +18 -0
  11. snowflake/snowpark_connect/error/__init__.py +11 -0
  12. snowflake/snowpark_connect/error/error_mapping.py +6174 -0
  13. snowflake/snowpark_connect/error/error_utils.py +321 -0
  14. snowflake/snowpark_connect/error/exceptions.py +24 -0
  15. snowflake/snowpark_connect/execute_plan/__init__.py +3 -0
  16. snowflake/snowpark_connect/execute_plan/map_execution_command.py +204 -0
  17. snowflake/snowpark_connect/execute_plan/map_execution_root.py +173 -0
  18. snowflake/snowpark_connect/execute_plan/utils.py +183 -0
  19. snowflake/snowpark_connect/expression/__init__.py +3 -0
  20. snowflake/snowpark_connect/expression/literal.py +90 -0
  21. snowflake/snowpark_connect/expression/map_cast.py +343 -0
  22. snowflake/snowpark_connect/expression/map_expression.py +293 -0
  23. snowflake/snowpark_connect/expression/map_extension.py +104 -0
  24. snowflake/snowpark_connect/expression/map_sql_expression.py +633 -0
  25. snowflake/snowpark_connect/expression/map_udf.py +142 -0
  26. snowflake/snowpark_connect/expression/map_unresolved_attribute.py +241 -0
  27. snowflake/snowpark_connect/expression/map_unresolved_extract_value.py +85 -0
  28. snowflake/snowpark_connect/expression/map_unresolved_function.py +9450 -0
  29. snowflake/snowpark_connect/expression/map_unresolved_star.py +218 -0
  30. snowflake/snowpark_connect/expression/map_update_fields.py +164 -0
  31. snowflake/snowpark_connect/expression/map_window_function.py +258 -0
  32. snowflake/snowpark_connect/expression/typer.py +125 -0
  33. snowflake/snowpark_connect/includes/__init__.py +0 -0
  34. snowflake/snowpark_connect/includes/jars/antlr4-runtime-4.9.3.jar +0 -0
  35. snowflake/snowpark_connect/includes/jars/commons-cli-1.5.0.jar +0 -0
  36. snowflake/snowpark_connect/includes/jars/commons-codec-1.16.1.jar +0 -0
  37. snowflake/snowpark_connect/includes/jars/commons-collections-3.2.2.jar +0 -0
  38. snowflake/snowpark_connect/includes/jars/commons-collections4-4.4.jar +0 -0
  39. snowflake/snowpark_connect/includes/jars/commons-compiler-3.1.9.jar +0 -0
  40. snowflake/snowpark_connect/includes/jars/commons-compress-1.26.0.jar +0 -0
  41. snowflake/snowpark_connect/includes/jars/commons-crypto-1.1.0.jar +0 -0
  42. snowflake/snowpark_connect/includes/jars/commons-dbcp-1.4.jar +0 -0
  43. snowflake/snowpark_connect/includes/jars/commons-io-2.16.1.jar +0 -0
  44. snowflake/snowpark_connect/includes/jars/commons-lang-2.6.jar +0 -0
  45. snowflake/snowpark_connect/includes/jars/commons-lang3-3.12.0.jar +0 -0
  46. snowflake/snowpark_connect/includes/jars/commons-logging-1.1.3.jar +0 -0
  47. snowflake/snowpark_connect/includes/jars/commons-math3-3.6.1.jar +0 -0
  48. snowflake/snowpark_connect/includes/jars/commons-pool-1.5.4.jar +0 -0
  49. snowflake/snowpark_connect/includes/jars/commons-text-1.10.0.jar +0 -0
  50. snowflake/snowpark_connect/includes/jars/hadoop-client-api-3.3.4.jar +0 -0
  51. snowflake/snowpark_connect/includes/jars/jackson-annotations-2.15.2.jar +0 -0
  52. snowflake/snowpark_connect/includes/jars/jackson-core-2.15.2.jar +0 -0
  53. snowflake/snowpark_connect/includes/jars/jackson-core-asl-1.9.13.jar +0 -0
  54. snowflake/snowpark_connect/includes/jars/jackson-databind-2.15.2.jar +0 -0
  55. snowflake/snowpark_connect/includes/jars/jackson-dataformat-yaml-2.15.2.jar +0 -0
  56. snowflake/snowpark_connect/includes/jars/jackson-datatype-jsr310-2.15.2.jar +0 -0
  57. snowflake/snowpark_connect/includes/jars/jackson-mapper-asl-1.9.13.jar +0 -0
  58. snowflake/snowpark_connect/includes/jars/jackson-module-scala_2.12-2.15.2.jar +0 -0
  59. snowflake/snowpark_connect/includes/jars/json4s-ast_2.12-3.7.0-M11.jar +0 -0
  60. snowflake/snowpark_connect/includes/jars/json4s-core_2.12-3.7.0-M11.jar +0 -0
  61. snowflake/snowpark_connect/includes/jars/json4s-jackson_2.12-3.7.0-M11.jar +0 -0
  62. snowflake/snowpark_connect/includes/jars/json4s-scalap_2.12-3.7.0-M11.jar +0 -0
  63. snowflake/snowpark_connect/includes/jars/kryo-shaded-4.0.2.jar +0 -0
  64. snowflake/snowpark_connect/includes/jars/log4j-1.2-api-2.20.0.jar +0 -0
  65. snowflake/snowpark_connect/includes/jars/log4j-api-2.20.0.jar +0 -0
  66. snowflake/snowpark_connect/includes/jars/log4j-core-2.20.0.jar +0 -0
  67. snowflake/snowpark_connect/includes/jars/log4j-slf4j2-impl-2.20.0.jar +0 -0
  68. snowflake/snowpark_connect/includes/jars/paranamer-2.8.jar +0 -0
  69. snowflake/snowpark_connect/includes/jars/scala-collection-compat_2.12-2.7.0.jar +0 -0
  70. snowflake/snowpark_connect/includes/jars/scala-compiler-2.12.18.jar +0 -0
  71. snowflake/snowpark_connect/includes/jars/scala-library-2.12.18.jar +0 -0
  72. snowflake/snowpark_connect/includes/jars/scala-parser-combinators_2.12-2.3.0.jar +0 -0
  73. snowflake/snowpark_connect/includes/jars/scala-reflect-2.12.18.jar +0 -0
  74. snowflake/snowpark_connect/includes/jars/scala-xml_2.12-2.1.0.jar +0 -0
  75. snowflake/snowpark_connect/includes/jars/slf4j-api-2.0.7.jar +0 -0
  76. snowflake/snowpark_connect/includes/jars/spark-catalyst_2.12-3.5.6.jar +0 -0
  77. snowflake/snowpark_connect/includes/jars/spark-common-utils_2.12-3.5.6.jar +0 -0
  78. snowflake/snowpark_connect/includes/jars/spark-core_2.12-3.5.6.jar +0 -0
  79. snowflake/snowpark_connect/includes/jars/spark-graphx_2.12-3.5.6.jar +0 -0
  80. snowflake/snowpark_connect/includes/jars/spark-hive-thriftserver_2.12-3.5.6.jar +0 -0
  81. snowflake/snowpark_connect/includes/jars/spark-hive_2.12-3.5.6.jar +0 -0
  82. snowflake/snowpark_connect/includes/jars/spark-kubernetes_2.12-3.5.6.jar +0 -0
  83. snowflake/snowpark_connect/includes/jars/spark-kvstore_2.12-3.5.6.jar +0 -0
  84. snowflake/snowpark_connect/includes/jars/spark-launcher_2.12-3.5.6.jar +0 -0
  85. snowflake/snowpark_connect/includes/jars/spark-mesos_2.12-3.5.6.jar +0 -0
  86. snowflake/snowpark_connect/includes/jars/spark-mllib-local_2.12-3.5.6.jar +0 -0
  87. snowflake/snowpark_connect/includes/jars/spark-mllib_2.12-3.5.6.jar +0 -0
  88. snowflake/snowpark_connect/includes/jars/spark-network-common_2.12-3.5.6.jar +0 -0
  89. snowflake/snowpark_connect/includes/jars/spark-network-shuffle_2.12-3.5.6.jar +0 -0
  90. snowflake/snowpark_connect/includes/jars/spark-repl_2.12-3.5.6.jar +0 -0
  91. snowflake/snowpark_connect/includes/jars/spark-sketch_2.12-3.5.6.jar +0 -0
  92. snowflake/snowpark_connect/includes/jars/spark-sql-api_2.12-3.5.6.jar +0 -0
  93. snowflake/snowpark_connect/includes/jars/spark-sql_2.12-3.5.6.jar +0 -0
  94. snowflake/snowpark_connect/includes/jars/spark-streaming_2.12-3.5.6.jar +0 -0
  95. snowflake/snowpark_connect/includes/jars/spark-tags_2.12-3.5.6.jar +0 -0
  96. snowflake/snowpark_connect/includes/jars/spark-unsafe_2.12-3.5.6.jar +0 -0
  97. snowflake/snowpark_connect/includes/jars/spark-yarn_2.12-3.5.6.jar +0 -0
  98. snowflake/snowpark_connect/includes/python/__init__.py +21 -0
  99. snowflake/snowpark_connect/includes/python/pyspark/__init__.py +173 -0
  100. snowflake/snowpark_connect/includes/python/pyspark/_globals.py +71 -0
  101. snowflake/snowpark_connect/includes/python/pyspark/_typing.pyi +43 -0
  102. snowflake/snowpark_connect/includes/python/pyspark/accumulators.py +341 -0
  103. snowflake/snowpark_connect/includes/python/pyspark/broadcast.py +383 -0
  104. snowflake/snowpark_connect/includes/python/pyspark/cloudpickle/__init__.py +8 -0
  105. snowflake/snowpark_connect/includes/python/pyspark/cloudpickle/cloudpickle.py +948 -0
  106. snowflake/snowpark_connect/includes/python/pyspark/cloudpickle/cloudpickle_fast.py +844 -0
  107. snowflake/snowpark_connect/includes/python/pyspark/cloudpickle/compat.py +18 -0
  108. snowflake/snowpark_connect/includes/python/pyspark/conf.py +276 -0
  109. snowflake/snowpark_connect/includes/python/pyspark/context.py +2601 -0
  110. snowflake/snowpark_connect/includes/python/pyspark/daemon.py +218 -0
  111. snowflake/snowpark_connect/includes/python/pyspark/errors/__init__.py +70 -0
  112. snowflake/snowpark_connect/includes/python/pyspark/errors/error_classes.py +889 -0
  113. snowflake/snowpark_connect/includes/python/pyspark/errors/exceptions/__init__.py +16 -0
  114. snowflake/snowpark_connect/includes/python/pyspark/errors/exceptions/base.py +228 -0
  115. snowflake/snowpark_connect/includes/python/pyspark/errors/exceptions/captured.py +307 -0
  116. snowflake/snowpark_connect/includes/python/pyspark/errors/exceptions/connect.py +190 -0
  117. snowflake/snowpark_connect/includes/python/pyspark/errors/tests/__init__.py +16 -0
  118. snowflake/snowpark_connect/includes/python/pyspark/errors/tests/test_errors.py +60 -0
  119. snowflake/snowpark_connect/includes/python/pyspark/errors/utils.py +116 -0
  120. snowflake/snowpark_connect/includes/python/pyspark/files.py +165 -0
  121. snowflake/snowpark_connect/includes/python/pyspark/find_spark_home.py +95 -0
  122. snowflake/snowpark_connect/includes/python/pyspark/install.py +203 -0
  123. snowflake/snowpark_connect/includes/python/pyspark/instrumentation_utils.py +190 -0
  124. snowflake/snowpark_connect/includes/python/pyspark/java_gateway.py +248 -0
  125. snowflake/snowpark_connect/includes/python/pyspark/join.py +118 -0
  126. snowflake/snowpark_connect/includes/python/pyspark/ml/__init__.py +71 -0
  127. snowflake/snowpark_connect/includes/python/pyspark/ml/_typing.pyi +84 -0
  128. snowflake/snowpark_connect/includes/python/pyspark/ml/base.py +414 -0
  129. snowflake/snowpark_connect/includes/python/pyspark/ml/classification.py +4332 -0
  130. snowflake/snowpark_connect/includes/python/pyspark/ml/clustering.py +2188 -0
  131. snowflake/snowpark_connect/includes/python/pyspark/ml/common.py +146 -0
  132. snowflake/snowpark_connect/includes/python/pyspark/ml/connect/__init__.py +44 -0
  133. snowflake/snowpark_connect/includes/python/pyspark/ml/connect/base.py +346 -0
  134. snowflake/snowpark_connect/includes/python/pyspark/ml/connect/classification.py +382 -0
  135. snowflake/snowpark_connect/includes/python/pyspark/ml/connect/evaluation.py +291 -0
  136. snowflake/snowpark_connect/includes/python/pyspark/ml/connect/feature.py +258 -0
  137. snowflake/snowpark_connect/includes/python/pyspark/ml/connect/functions.py +77 -0
  138. snowflake/snowpark_connect/includes/python/pyspark/ml/connect/io_utils.py +335 -0
  139. snowflake/snowpark_connect/includes/python/pyspark/ml/connect/pipeline.py +262 -0
  140. snowflake/snowpark_connect/includes/python/pyspark/ml/connect/summarizer.py +120 -0
  141. snowflake/snowpark_connect/includes/python/pyspark/ml/connect/tuning.py +579 -0
  142. snowflake/snowpark_connect/includes/python/pyspark/ml/connect/util.py +173 -0
  143. snowflake/snowpark_connect/includes/python/pyspark/ml/deepspeed/__init__.py +16 -0
  144. snowflake/snowpark_connect/includes/python/pyspark/ml/deepspeed/deepspeed_distributor.py +165 -0
  145. snowflake/snowpark_connect/includes/python/pyspark/ml/deepspeed/tests/test_deepspeed_distributor.py +306 -0
  146. snowflake/snowpark_connect/includes/python/pyspark/ml/dl_util.py +150 -0
  147. snowflake/snowpark_connect/includes/python/pyspark/ml/evaluation.py +1166 -0
  148. snowflake/snowpark_connect/includes/python/pyspark/ml/feature.py +7474 -0
  149. snowflake/snowpark_connect/includes/python/pyspark/ml/fpm.py +543 -0
  150. snowflake/snowpark_connect/includes/python/pyspark/ml/functions.py +842 -0
  151. snowflake/snowpark_connect/includes/python/pyspark/ml/image.py +271 -0
  152. snowflake/snowpark_connect/includes/python/pyspark/ml/linalg/__init__.py +1382 -0
  153. snowflake/snowpark_connect/includes/python/pyspark/ml/model_cache.py +55 -0
  154. snowflake/snowpark_connect/includes/python/pyspark/ml/param/__init__.py +602 -0
  155. snowflake/snowpark_connect/includes/python/pyspark/ml/param/_shared_params_code_gen.py +368 -0
  156. snowflake/snowpark_connect/includes/python/pyspark/ml/param/shared.py +878 -0
  157. snowflake/snowpark_connect/includes/python/pyspark/ml/pipeline.py +451 -0
  158. snowflake/snowpark_connect/includes/python/pyspark/ml/recommendation.py +748 -0
  159. snowflake/snowpark_connect/includes/python/pyspark/ml/regression.py +3335 -0
  160. snowflake/snowpark_connect/includes/python/pyspark/ml/stat.py +523 -0
  161. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/__init__.py +16 -0
  162. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_connect_classification.py +53 -0
  163. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_connect_evaluation.py +50 -0
  164. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_connect_feature.py +43 -0
  165. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_connect_function.py +114 -0
  166. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_connect_pipeline.py +47 -0
  167. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_connect_summarizer.py +43 -0
  168. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_connect_tuning.py +46 -0
  169. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_legacy_mode_classification.py +238 -0
  170. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_legacy_mode_evaluation.py +194 -0
  171. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_legacy_mode_feature.py +156 -0
  172. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_legacy_mode_pipeline.py +184 -0
  173. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_legacy_mode_summarizer.py +78 -0
  174. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_legacy_mode_tuning.py +292 -0
  175. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_parity_torch_data_loader.py +50 -0
  176. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_parity_torch_distributor.py +152 -0
  177. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_algorithms.py +456 -0
  178. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_base.py +96 -0
  179. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_dl_util.py +186 -0
  180. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_evaluation.py +77 -0
  181. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_feature.py +401 -0
  182. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_functions.py +528 -0
  183. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_image.py +82 -0
  184. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_linalg.py +409 -0
  185. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_model_cache.py +55 -0
  186. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_param.py +441 -0
  187. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_persistence.py +546 -0
  188. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_pipeline.py +71 -0
  189. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_stat.py +52 -0
  190. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_training_summary.py +494 -0
  191. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_util.py +85 -0
  192. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_wrapper.py +138 -0
  193. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/tuning/__init__.py +16 -0
  194. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/tuning/test_cv_io_basic.py +151 -0
  195. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/tuning/test_cv_io_nested.py +97 -0
  196. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/tuning/test_cv_io_pipeline.py +143 -0
  197. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/tuning/test_tuning.py +551 -0
  198. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/tuning/test_tvs_io_basic.py +137 -0
  199. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/tuning/test_tvs_io_nested.py +96 -0
  200. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/tuning/test_tvs_io_pipeline.py +142 -0
  201. snowflake/snowpark_connect/includes/python/pyspark/ml/torch/__init__.py +16 -0
  202. snowflake/snowpark_connect/includes/python/pyspark/ml/torch/data.py +100 -0
  203. snowflake/snowpark_connect/includes/python/pyspark/ml/torch/distributor.py +1133 -0
  204. snowflake/snowpark_connect/includes/python/pyspark/ml/torch/log_communication.py +198 -0
  205. snowflake/snowpark_connect/includes/python/pyspark/ml/torch/tests/__init__.py +16 -0
  206. snowflake/snowpark_connect/includes/python/pyspark/ml/torch/tests/test_data_loader.py +137 -0
  207. snowflake/snowpark_connect/includes/python/pyspark/ml/torch/tests/test_distributor.py +561 -0
  208. snowflake/snowpark_connect/includes/python/pyspark/ml/torch/tests/test_log_communication.py +172 -0
  209. snowflake/snowpark_connect/includes/python/pyspark/ml/torch/torch_run_process_wrapper.py +83 -0
  210. snowflake/snowpark_connect/includes/python/pyspark/ml/tree.py +434 -0
  211. snowflake/snowpark_connect/includes/python/pyspark/ml/tuning.py +1741 -0
  212. snowflake/snowpark_connect/includes/python/pyspark/ml/util.py +749 -0
  213. snowflake/snowpark_connect/includes/python/pyspark/ml/wrapper.py +465 -0
  214. snowflake/snowpark_connect/includes/python/pyspark/mllib/__init__.py +44 -0
  215. snowflake/snowpark_connect/includes/python/pyspark/mllib/_typing.pyi +33 -0
  216. snowflake/snowpark_connect/includes/python/pyspark/mllib/classification.py +989 -0
  217. snowflake/snowpark_connect/includes/python/pyspark/mllib/clustering.py +1318 -0
  218. snowflake/snowpark_connect/includes/python/pyspark/mllib/common.py +174 -0
  219. snowflake/snowpark_connect/includes/python/pyspark/mllib/evaluation.py +691 -0
  220. snowflake/snowpark_connect/includes/python/pyspark/mllib/feature.py +1085 -0
  221. snowflake/snowpark_connect/includes/python/pyspark/mllib/fpm.py +233 -0
  222. snowflake/snowpark_connect/includes/python/pyspark/mllib/linalg/__init__.py +1653 -0
  223. snowflake/snowpark_connect/includes/python/pyspark/mllib/linalg/distributed.py +1662 -0
  224. snowflake/snowpark_connect/includes/python/pyspark/mllib/random.py +698 -0
  225. snowflake/snowpark_connect/includes/python/pyspark/mllib/recommendation.py +389 -0
  226. snowflake/snowpark_connect/includes/python/pyspark/mllib/regression.py +1067 -0
  227. snowflake/snowpark_connect/includes/python/pyspark/mllib/stat/KernelDensity.py +59 -0
  228. snowflake/snowpark_connect/includes/python/pyspark/mllib/stat/__init__.py +34 -0
  229. snowflake/snowpark_connect/includes/python/pyspark/mllib/stat/_statistics.py +409 -0
  230. snowflake/snowpark_connect/includes/python/pyspark/mllib/stat/distribution.py +39 -0
  231. snowflake/snowpark_connect/includes/python/pyspark/mllib/stat/test.py +86 -0
  232. snowflake/snowpark_connect/includes/python/pyspark/mllib/tests/__init__.py +16 -0
  233. snowflake/snowpark_connect/includes/python/pyspark/mllib/tests/test_algorithms.py +353 -0
  234. snowflake/snowpark_connect/includes/python/pyspark/mllib/tests/test_feature.py +192 -0
  235. snowflake/snowpark_connect/includes/python/pyspark/mllib/tests/test_linalg.py +680 -0
  236. snowflake/snowpark_connect/includes/python/pyspark/mllib/tests/test_stat.py +206 -0
  237. snowflake/snowpark_connect/includes/python/pyspark/mllib/tests/test_streaming_algorithms.py +471 -0
  238. snowflake/snowpark_connect/includes/python/pyspark/mllib/tests/test_util.py +108 -0
  239. snowflake/snowpark_connect/includes/python/pyspark/mllib/tree.py +888 -0
  240. snowflake/snowpark_connect/includes/python/pyspark/mllib/util.py +659 -0
  241. snowflake/snowpark_connect/includes/python/pyspark/pandas/__init__.py +165 -0
  242. snowflake/snowpark_connect/includes/python/pyspark/pandas/_typing.py +52 -0
  243. snowflake/snowpark_connect/includes/python/pyspark/pandas/accessors.py +989 -0
  244. snowflake/snowpark_connect/includes/python/pyspark/pandas/base.py +1804 -0
  245. snowflake/snowpark_connect/includes/python/pyspark/pandas/categorical.py +822 -0
  246. snowflake/snowpark_connect/includes/python/pyspark/pandas/config.py +539 -0
  247. snowflake/snowpark_connect/includes/python/pyspark/pandas/correlation.py +262 -0
  248. snowflake/snowpark_connect/includes/python/pyspark/pandas/data_type_ops/__init__.py +16 -0
  249. snowflake/snowpark_connect/includes/python/pyspark/pandas/data_type_ops/base.py +519 -0
  250. snowflake/snowpark_connect/includes/python/pyspark/pandas/data_type_ops/binary_ops.py +98 -0
  251. snowflake/snowpark_connect/includes/python/pyspark/pandas/data_type_ops/boolean_ops.py +426 -0
  252. snowflake/snowpark_connect/includes/python/pyspark/pandas/data_type_ops/categorical_ops.py +141 -0
  253. snowflake/snowpark_connect/includes/python/pyspark/pandas/data_type_ops/complex_ops.py +145 -0
  254. snowflake/snowpark_connect/includes/python/pyspark/pandas/data_type_ops/date_ops.py +127 -0
  255. snowflake/snowpark_connect/includes/python/pyspark/pandas/data_type_ops/datetime_ops.py +171 -0
  256. snowflake/snowpark_connect/includes/python/pyspark/pandas/data_type_ops/null_ops.py +83 -0
  257. snowflake/snowpark_connect/includes/python/pyspark/pandas/data_type_ops/num_ops.py +588 -0
  258. snowflake/snowpark_connect/includes/python/pyspark/pandas/data_type_ops/string_ops.py +154 -0
  259. snowflake/snowpark_connect/includes/python/pyspark/pandas/data_type_ops/timedelta_ops.py +101 -0
  260. snowflake/snowpark_connect/includes/python/pyspark/pandas/data_type_ops/udt_ops.py +29 -0
  261. snowflake/snowpark_connect/includes/python/pyspark/pandas/datetimes.py +891 -0
  262. snowflake/snowpark_connect/includes/python/pyspark/pandas/exceptions.py +150 -0
  263. snowflake/snowpark_connect/includes/python/pyspark/pandas/extensions.py +388 -0
  264. snowflake/snowpark_connect/includes/python/pyspark/pandas/frame.py +13738 -0
  265. snowflake/snowpark_connect/includes/python/pyspark/pandas/generic.py +3560 -0
  266. snowflake/snowpark_connect/includes/python/pyspark/pandas/groupby.py +4448 -0
  267. snowflake/snowpark_connect/includes/python/pyspark/pandas/indexes/__init__.py +21 -0
  268. snowflake/snowpark_connect/includes/python/pyspark/pandas/indexes/base.py +2783 -0
  269. snowflake/snowpark_connect/includes/python/pyspark/pandas/indexes/category.py +773 -0
  270. snowflake/snowpark_connect/includes/python/pyspark/pandas/indexes/datetimes.py +843 -0
  271. snowflake/snowpark_connect/includes/python/pyspark/pandas/indexes/multi.py +1323 -0
  272. snowflake/snowpark_connect/includes/python/pyspark/pandas/indexes/numeric.py +210 -0
  273. snowflake/snowpark_connect/includes/python/pyspark/pandas/indexes/timedelta.py +197 -0
  274. snowflake/snowpark_connect/includes/python/pyspark/pandas/indexing.py +1862 -0
  275. snowflake/snowpark_connect/includes/python/pyspark/pandas/internal.py +1680 -0
  276. snowflake/snowpark_connect/includes/python/pyspark/pandas/missing/__init__.py +48 -0
  277. snowflake/snowpark_connect/includes/python/pyspark/pandas/missing/common.py +76 -0
  278. snowflake/snowpark_connect/includes/python/pyspark/pandas/missing/frame.py +63 -0
  279. snowflake/snowpark_connect/includes/python/pyspark/pandas/missing/general_functions.py +43 -0
  280. snowflake/snowpark_connect/includes/python/pyspark/pandas/missing/groupby.py +93 -0
  281. snowflake/snowpark_connect/includes/python/pyspark/pandas/missing/indexes.py +184 -0
  282. snowflake/snowpark_connect/includes/python/pyspark/pandas/missing/resample.py +101 -0
  283. snowflake/snowpark_connect/includes/python/pyspark/pandas/missing/scalars.py +29 -0
  284. snowflake/snowpark_connect/includes/python/pyspark/pandas/missing/series.py +69 -0
  285. snowflake/snowpark_connect/includes/python/pyspark/pandas/missing/window.py +168 -0
  286. snowflake/snowpark_connect/includes/python/pyspark/pandas/mlflow.py +238 -0
  287. snowflake/snowpark_connect/includes/python/pyspark/pandas/namespace.py +3807 -0
  288. snowflake/snowpark_connect/includes/python/pyspark/pandas/numpy_compat.py +260 -0
  289. snowflake/snowpark_connect/includes/python/pyspark/pandas/plot/__init__.py +17 -0
  290. snowflake/snowpark_connect/includes/python/pyspark/pandas/plot/core.py +1213 -0
  291. snowflake/snowpark_connect/includes/python/pyspark/pandas/plot/matplotlib.py +928 -0
  292. snowflake/snowpark_connect/includes/python/pyspark/pandas/plot/plotly.py +261 -0
  293. snowflake/snowpark_connect/includes/python/pyspark/pandas/resample.py +816 -0
  294. snowflake/snowpark_connect/includes/python/pyspark/pandas/series.py +7440 -0
  295. snowflake/snowpark_connect/includes/python/pyspark/pandas/sql_formatter.py +308 -0
  296. snowflake/snowpark_connect/includes/python/pyspark/pandas/sql_processor.py +394 -0
  297. snowflake/snowpark_connect/includes/python/pyspark/pandas/strings.py +2371 -0
  298. snowflake/snowpark_connect/includes/python/pyspark/pandas/supported_api_gen.py +378 -0
  299. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/__init__.py +16 -0
  300. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/__init__.py +16 -0
  301. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_any_all.py +177 -0
  302. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_apply_func.py +575 -0
  303. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_binary_ops.py +235 -0
  304. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_combine.py +653 -0
  305. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_compute.py +463 -0
  306. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_corrwith.py +86 -0
  307. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_cov.py +151 -0
  308. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_cumulative.py +139 -0
  309. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_describe.py +458 -0
  310. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_eval.py +86 -0
  311. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_melt.py +202 -0
  312. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_missing_data.py +520 -0
  313. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_pivot.py +361 -0
  314. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/__init__.py +16 -0
  315. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/__init__.py +16 -0
  316. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_any_all.py +40 -0
  317. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_apply_func.py +42 -0
  318. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_binary_ops.py +40 -0
  319. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_combine.py +37 -0
  320. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_compute.py +60 -0
  321. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_corrwith.py +40 -0
  322. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_cov.py +40 -0
  323. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_cumulative.py +90 -0
  324. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_describe.py +40 -0
  325. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_eval.py +40 -0
  326. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_melt.py +40 -0
  327. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_missing_data.py +42 -0
  328. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_pivot.py +37 -0
  329. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/__init__.py +16 -0
  330. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_base.py +36 -0
  331. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_binary_ops.py +42 -0
  332. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_boolean_ops.py +47 -0
  333. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_categorical_ops.py +55 -0
  334. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_complex_ops.py +40 -0
  335. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_date_ops.py +47 -0
  336. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_datetime_ops.py +47 -0
  337. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_null_ops.py +42 -0
  338. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_num_arithmetic.py +43 -0
  339. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_num_ops.py +47 -0
  340. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_num_reverse.py +43 -0
  341. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_string_ops.py +47 -0
  342. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_timedelta_ops.py +47 -0
  343. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_udt_ops.py +40 -0
  344. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/testing_utils.py +226 -0
  345. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/__init__.py +16 -0
  346. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_align.py +39 -0
  347. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_basic_slow.py +55 -0
  348. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_cov_corrwith.py +39 -0
  349. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_dot_frame.py +39 -0
  350. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_dot_series.py +39 -0
  351. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_index.py +39 -0
  352. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_series.py +39 -0
  353. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_setitem_frame.py +43 -0
  354. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_setitem_series.py +43 -0
  355. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/__init__.py +16 -0
  356. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_attrs.py +40 -0
  357. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_constructor.py +39 -0
  358. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_conversion.py +42 -0
  359. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_reindexing.py +42 -0
  360. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_reshaping.py +37 -0
  361. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_spark.py +40 -0
  362. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_take.py +42 -0
  363. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_time_series.py +48 -0
  364. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_truncate.py +40 -0
  365. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/__init__.py +16 -0
  366. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_aggregate.py +40 -0
  367. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_apply_func.py +41 -0
  368. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_cumulative.py +67 -0
  369. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_describe.py +40 -0
  370. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_groupby.py +55 -0
  371. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_head_tail.py +40 -0
  372. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_index.py +38 -0
  373. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_missing_data.py +55 -0
  374. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_split_apply.py +39 -0
  375. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_stat.py +38 -0
  376. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/__init__.py +16 -0
  377. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_align.py +40 -0
  378. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_base.py +50 -0
  379. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_category.py +73 -0
  380. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_datetime.py +39 -0
  381. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_indexing.py +40 -0
  382. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_reindex.py +40 -0
  383. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_rename.py +40 -0
  384. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_reset_index.py +48 -0
  385. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_timedelta.py +39 -0
  386. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/io/__init__.py +16 -0
  387. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/io/test_parity_io.py +40 -0
  388. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/plot/__init__.py +16 -0
  389. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/plot/test_parity_frame_plot.py +45 -0
  390. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/plot/test_parity_frame_plot_matplotlib.py +45 -0
  391. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/plot/test_parity_frame_plot_plotly.py +49 -0
  392. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/plot/test_parity_series_plot.py +37 -0
  393. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/plot/test_parity_series_plot_matplotlib.py +53 -0
  394. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/plot/test_parity_series_plot_plotly.py +45 -0
  395. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/__init__.py +16 -0
  396. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_all_any.py +38 -0
  397. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_arg_ops.py +37 -0
  398. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_as_of.py +37 -0
  399. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_as_type.py +38 -0
  400. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_compute.py +37 -0
  401. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_conversion.py +40 -0
  402. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_cumulative.py +40 -0
  403. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_index.py +38 -0
  404. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_missing_data.py +40 -0
  405. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_series.py +37 -0
  406. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_sort.py +38 -0
  407. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_stat.py +38 -0
  408. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_categorical.py +66 -0
  409. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_config.py +37 -0
  410. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_csv.py +37 -0
  411. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_dataframe_conversion.py +42 -0
  412. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_dataframe_spark_io.py +39 -0
  413. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_default_index.py +49 -0
  414. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_ewm.py +37 -0
  415. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_expanding.py +39 -0
  416. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_extension.py +49 -0
  417. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_frame_spark.py +53 -0
  418. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_generic_functions.py +43 -0
  419. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_indexing.py +49 -0
  420. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_indexops_spark.py +39 -0
  421. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_internal.py +41 -0
  422. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_namespace.py +39 -0
  423. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_numpy_compat.py +60 -0
  424. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_ops_on_diff_frames.py +48 -0
  425. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_ops_on_diff_frames_groupby.py +39 -0
  426. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_ops_on_diff_frames_groupby_expanding.py +44 -0
  427. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_ops_on_diff_frames_groupby_rolling.py +84 -0
  428. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_repr.py +37 -0
  429. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_resample.py +45 -0
  430. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_reshape.py +39 -0
  431. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_rolling.py +39 -0
  432. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_scalars.py +37 -0
  433. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_series_conversion.py +39 -0
  434. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_series_datetime.py +39 -0
  435. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_series_string.py +39 -0
  436. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_spark_functions.py +39 -0
  437. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_sql.py +43 -0
  438. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_stats.py +37 -0
  439. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_typedef.py +36 -0
  440. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_utils.py +37 -0
  441. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_window.py +39 -0
  442. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/__init__.py +16 -0
  443. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_base.py +107 -0
  444. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_binary_ops.py +224 -0
  445. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_boolean_ops.py +825 -0
  446. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_categorical_ops.py +562 -0
  447. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_complex_ops.py +368 -0
  448. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_date_ops.py +257 -0
  449. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_datetime_ops.py +260 -0
  450. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_null_ops.py +178 -0
  451. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_num_arithmetic.py +184 -0
  452. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_num_ops.py +497 -0
  453. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_num_reverse.py +140 -0
  454. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_string_ops.py +354 -0
  455. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_timedelta_ops.py +219 -0
  456. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_udt_ops.py +192 -0
  457. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/testing_utils.py +228 -0
  458. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/__init__.py +16 -0
  459. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_align.py +118 -0
  460. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_basic_slow.py +198 -0
  461. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_cov_corrwith.py +181 -0
  462. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_dot_frame.py +103 -0
  463. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_dot_series.py +141 -0
  464. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_index.py +109 -0
  465. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_series.py +136 -0
  466. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_setitem_frame.py +125 -0
  467. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_setitem_series.py +217 -0
  468. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/__init__.py +16 -0
  469. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_attrs.py +384 -0
  470. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_constructor.py +598 -0
  471. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_conversion.py +73 -0
  472. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_reindexing.py +869 -0
  473. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_reshaping.py +487 -0
  474. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_spark.py +309 -0
  475. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_take.py +156 -0
  476. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_time_series.py +149 -0
  477. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_truncate.py +163 -0
  478. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/__init__.py +16 -0
  479. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_aggregate.py +311 -0
  480. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_apply_func.py +524 -0
  481. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_cumulative.py +419 -0
  482. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_describe.py +144 -0
  483. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_groupby.py +979 -0
  484. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_head_tail.py +234 -0
  485. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_index.py +206 -0
  486. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_missing_data.py +421 -0
  487. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_split_apply.py +187 -0
  488. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_stat.py +397 -0
  489. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/__init__.py +16 -0
  490. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_align.py +100 -0
  491. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_base.py +2743 -0
  492. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_category.py +484 -0
  493. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_datetime.py +276 -0
  494. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_indexing.py +432 -0
  495. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_reindex.py +310 -0
  496. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_rename.py +257 -0
  497. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_reset_index.py +160 -0
  498. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_timedelta.py +128 -0
  499. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/io/__init__.py +16 -0
  500. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/io/test_io.py +137 -0
  501. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/plot/__init__.py +16 -0
  502. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/plot/test_frame_plot.py +170 -0
  503. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/plot/test_frame_plot_matplotlib.py +547 -0
  504. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/plot/test_frame_plot_plotly.py +285 -0
  505. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/plot/test_series_plot.py +106 -0
  506. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/plot/test_series_plot_matplotlib.py +409 -0
  507. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/plot/test_series_plot_plotly.py +247 -0
  508. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/__init__.py +16 -0
  509. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_all_any.py +105 -0
  510. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_arg_ops.py +197 -0
  511. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_as_of.py +137 -0
  512. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_as_type.py +227 -0
  513. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_compute.py +634 -0
  514. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_conversion.py +88 -0
  515. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_cumulative.py +139 -0
  516. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_index.py +475 -0
  517. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_missing_data.py +265 -0
  518. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_series.py +818 -0
  519. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_sort.py +162 -0
  520. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_stat.py +780 -0
  521. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_categorical.py +741 -0
  522. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_config.py +160 -0
  523. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_csv.py +453 -0
  524. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_dataframe_conversion.py +281 -0
  525. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_dataframe_spark_io.py +487 -0
  526. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_default_index.py +109 -0
  527. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_ewm.py +434 -0
  528. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_expanding.py +253 -0
  529. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_extension.py +152 -0
  530. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_frame_spark.py +162 -0
  531. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_generic_functions.py +234 -0
  532. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_indexing.py +1339 -0
  533. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_indexops_spark.py +82 -0
  534. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_internal.py +124 -0
  535. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_namespace.py +638 -0
  536. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_numpy_compat.py +200 -0
  537. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_ops_on_diff_frames.py +1355 -0
  538. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_ops_on_diff_frames_groupby.py +655 -0
  539. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_ops_on_diff_frames_groupby_expanding.py +113 -0
  540. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_ops_on_diff_frames_groupby_rolling.py +118 -0
  541. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_repr.py +192 -0
  542. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_resample.py +346 -0
  543. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_reshape.py +495 -0
  544. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_rolling.py +263 -0
  545. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_scalars.py +59 -0
  546. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_series_conversion.py +85 -0
  547. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_series_datetime.py +364 -0
  548. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_series_string.py +362 -0
  549. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_spark_functions.py +46 -0
  550. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_sql.py +123 -0
  551. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_stats.py +581 -0
  552. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_typedef.py +447 -0
  553. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_utils.py +301 -0
  554. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_window.py +465 -0
  555. snowflake/snowpark_connect/includes/python/pyspark/pandas/typedef/__init__.py +18 -0
  556. snowflake/snowpark_connect/includes/python/pyspark/pandas/typedef/typehints.py +874 -0
  557. snowflake/snowpark_connect/includes/python/pyspark/pandas/usage_logging/__init__.py +143 -0
  558. snowflake/snowpark_connect/includes/python/pyspark/pandas/usage_logging/usage_logger.py +132 -0
  559. snowflake/snowpark_connect/includes/python/pyspark/pandas/utils.py +1063 -0
  560. snowflake/snowpark_connect/includes/python/pyspark/pandas/window.py +2702 -0
  561. snowflake/snowpark_connect/includes/python/pyspark/profiler.py +489 -0
  562. snowflake/snowpark_connect/includes/python/pyspark/py.typed +1 -0
  563. snowflake/snowpark_connect/includes/python/pyspark/python/pyspark/shell.py +123 -0
  564. snowflake/snowpark_connect/includes/python/pyspark/rdd.py +5518 -0
  565. snowflake/snowpark_connect/includes/python/pyspark/rddsampler.py +115 -0
  566. snowflake/snowpark_connect/includes/python/pyspark/resource/__init__.py +38 -0
  567. snowflake/snowpark_connect/includes/python/pyspark/resource/information.py +69 -0
  568. snowflake/snowpark_connect/includes/python/pyspark/resource/profile.py +317 -0
  569. snowflake/snowpark_connect/includes/python/pyspark/resource/requests.py +539 -0
  570. snowflake/snowpark_connect/includes/python/pyspark/resource/tests/__init__.py +16 -0
  571. snowflake/snowpark_connect/includes/python/pyspark/resource/tests/test_resources.py +83 -0
  572. snowflake/snowpark_connect/includes/python/pyspark/resultiterable.py +45 -0
  573. snowflake/snowpark_connect/includes/python/pyspark/serializers.py +681 -0
  574. snowflake/snowpark_connect/includes/python/pyspark/shell.py +123 -0
  575. snowflake/snowpark_connect/includes/python/pyspark/shuffle.py +854 -0
  576. snowflake/snowpark_connect/includes/python/pyspark/sql/__init__.py +75 -0
  577. snowflake/snowpark_connect/includes/python/pyspark/sql/_typing.pyi +80 -0
  578. snowflake/snowpark_connect/includes/python/pyspark/sql/avro/__init__.py +18 -0
  579. snowflake/snowpark_connect/includes/python/pyspark/sql/avro/functions.py +188 -0
  580. snowflake/snowpark_connect/includes/python/pyspark/sql/catalog.py +1270 -0
  581. snowflake/snowpark_connect/includes/python/pyspark/sql/column.py +1431 -0
  582. snowflake/snowpark_connect/includes/python/pyspark/sql/conf.py +99 -0
  583. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/__init__.py +18 -0
  584. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/_typing.py +90 -0
  585. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/avro/__init__.py +18 -0
  586. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/avro/functions.py +107 -0
  587. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/catalog.py +356 -0
  588. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/client/__init__.py +22 -0
  589. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/client/artifact.py +412 -0
  590. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/client/core.py +1689 -0
  591. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/client/reattach.py +340 -0
  592. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/column.py +514 -0
  593. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/conf.py +128 -0
  594. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/conversion.py +490 -0
  595. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/dataframe.py +2172 -0
  596. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/expressions.py +1056 -0
  597. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/functions.py +3937 -0
  598. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/group.py +418 -0
  599. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/plan.py +2289 -0
  600. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/proto/__init__.py +25 -0
  601. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/proto/base_pb2.py +203 -0
  602. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/proto/base_pb2.pyi +2718 -0
  603. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/proto/base_pb2_grpc.py +423 -0
  604. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/proto/catalog_pb2.py +109 -0
  605. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/proto/catalog_pb2.pyi +1130 -0
  606. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/proto/commands_pb2.py +141 -0
  607. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/proto/commands_pb2.pyi +1766 -0
  608. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/proto/common_pb2.py +47 -0
  609. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/proto/common_pb2.pyi +123 -0
  610. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/proto/example_plugins_pb2.py +53 -0
  611. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/proto/example_plugins_pb2.pyi +112 -0
  612. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/proto/expressions_pb2.py +107 -0
  613. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/proto/expressions_pb2.pyi +1507 -0
  614. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/proto/relations_pb2.py +195 -0
  615. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/proto/relations_pb2.pyi +3613 -0
  616. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/proto/types_pb2.py +95 -0
  617. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/proto/types_pb2.pyi +980 -0
  618. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/protobuf/__init__.py +18 -0
  619. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/protobuf/functions.py +166 -0
  620. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/readwriter.py +861 -0
  621. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/session.py +952 -0
  622. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/streaming/__init__.py +22 -0
  623. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/streaming/query.py +295 -0
  624. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/streaming/readwriter.py +618 -0
  625. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/streaming/worker/__init__.py +18 -0
  626. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/streaming/worker/foreach_batch_worker.py +87 -0
  627. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/streaming/worker/listener_worker.py +100 -0
  628. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/types.py +301 -0
  629. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/udf.py +296 -0
  630. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/udtf.py +200 -0
  631. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/utils.py +58 -0
  632. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/window.py +266 -0
  633. snowflake/snowpark_connect/includes/python/pyspark/sql/context.py +818 -0
  634. snowflake/snowpark_connect/includes/python/pyspark/sql/dataframe.py +5973 -0
  635. snowflake/snowpark_connect/includes/python/pyspark/sql/functions.py +15889 -0
  636. snowflake/snowpark_connect/includes/python/pyspark/sql/group.py +547 -0
  637. snowflake/snowpark_connect/includes/python/pyspark/sql/observation.py +152 -0
  638. snowflake/snowpark_connect/includes/python/pyspark/sql/pandas/__init__.py +21 -0
  639. snowflake/snowpark_connect/includes/python/pyspark/sql/pandas/_typing/__init__.pyi +344 -0
  640. snowflake/snowpark_connect/includes/python/pyspark/sql/pandas/_typing/protocols/__init__.pyi +17 -0
  641. snowflake/snowpark_connect/includes/python/pyspark/sql/pandas/_typing/protocols/frame.pyi +20 -0
  642. snowflake/snowpark_connect/includes/python/pyspark/sql/pandas/_typing/protocols/series.pyi +20 -0
  643. snowflake/snowpark_connect/includes/python/pyspark/sql/pandas/conversion.py +671 -0
  644. snowflake/snowpark_connect/includes/python/pyspark/sql/pandas/functions.py +480 -0
  645. snowflake/snowpark_connect/includes/python/pyspark/sql/pandas/functions.pyi +132 -0
  646. snowflake/snowpark_connect/includes/python/pyspark/sql/pandas/group_ops.py +523 -0
  647. snowflake/snowpark_connect/includes/python/pyspark/sql/pandas/map_ops.py +216 -0
  648. snowflake/snowpark_connect/includes/python/pyspark/sql/pandas/serializers.py +1019 -0
  649. snowflake/snowpark_connect/includes/python/pyspark/sql/pandas/typehints.py +172 -0
  650. snowflake/snowpark_connect/includes/python/pyspark/sql/pandas/types.py +972 -0
  651. snowflake/snowpark_connect/includes/python/pyspark/sql/pandas/utils.py +86 -0
  652. snowflake/snowpark_connect/includes/python/pyspark/sql/protobuf/__init__.py +18 -0
  653. snowflake/snowpark_connect/includes/python/pyspark/sql/protobuf/functions.py +334 -0
  654. snowflake/snowpark_connect/includes/python/pyspark/sql/readwriter.py +2159 -0
  655. snowflake/snowpark_connect/includes/python/pyspark/sql/session.py +2088 -0
  656. snowflake/snowpark_connect/includes/python/pyspark/sql/sql_formatter.py +84 -0
  657. snowflake/snowpark_connect/includes/python/pyspark/sql/streaming/__init__.py +21 -0
  658. snowflake/snowpark_connect/includes/python/pyspark/sql/streaming/listener.py +1050 -0
  659. snowflake/snowpark_connect/includes/python/pyspark/sql/streaming/query.py +746 -0
  660. snowflake/snowpark_connect/includes/python/pyspark/sql/streaming/readwriter.py +1652 -0
  661. snowflake/snowpark_connect/includes/python/pyspark/sql/streaming/state.py +288 -0
  662. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/__init__.py +16 -0
  663. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/__init__.py +16 -0
  664. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/client/__init__.py +16 -0
  665. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/client/test_artifact.py +420 -0
  666. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/client/test_client.py +358 -0
  667. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/streaming/__init__.py +16 -0
  668. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/streaming/test_parity_foreach.py +36 -0
  669. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/streaming/test_parity_foreach_batch.py +44 -0
  670. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/streaming/test_parity_listener.py +116 -0
  671. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/streaming/test_parity_streaming.py +35 -0
  672. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_connect_basic.py +3612 -0
  673. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_connect_column.py +1042 -0
  674. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_connect_function.py +2381 -0
  675. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_connect_plan.py +1060 -0
  676. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_arrow.py +163 -0
  677. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_arrow_map.py +38 -0
  678. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_arrow_python_udf.py +48 -0
  679. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_catalog.py +36 -0
  680. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_column.py +55 -0
  681. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_conf.py +36 -0
  682. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_dataframe.py +96 -0
  683. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_datasources.py +44 -0
  684. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_errors.py +36 -0
  685. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_functions.py +59 -0
  686. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_group.py +36 -0
  687. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_pandas_cogrouped_map.py +59 -0
  688. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_pandas_grouped_map.py +74 -0
  689. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_pandas_grouped_map_with_state.py +62 -0
  690. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_pandas_map.py +58 -0
  691. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_pandas_udf.py +70 -0
  692. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_pandas_udf_grouped_agg.py +50 -0
  693. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_pandas_udf_scalar.py +68 -0
  694. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_pandas_udf_window.py +40 -0
  695. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_readwriter.py +46 -0
  696. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_serde.py +44 -0
  697. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_types.py +100 -0
  698. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_udf.py +100 -0
  699. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_udtf.py +163 -0
  700. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_session.py +181 -0
  701. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_utils.py +42 -0
  702. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/__init__.py +16 -0
  703. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_cogrouped_map.py +623 -0
  704. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_grouped_map.py +869 -0
  705. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_grouped_map_with_state.py +342 -0
  706. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_map.py +436 -0
  707. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_udf.py +363 -0
  708. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_udf_grouped_agg.py +592 -0
  709. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_udf_scalar.py +1503 -0
  710. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_udf_typehints.py +392 -0
  711. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_udf_typehints_with_future_annotations.py +375 -0
  712. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_udf_window.py +411 -0
  713. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/streaming/__init__.py +16 -0
  714. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/streaming/test_streaming.py +401 -0
  715. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/streaming/test_streaming_foreach.py +295 -0
  716. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/streaming/test_streaming_foreach_batch.py +106 -0
  717. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/streaming/test_streaming_listener.py +558 -0
  718. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_arrow.py +1346 -0
  719. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_arrow_map.py +182 -0
  720. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_arrow_python_udf.py +202 -0
  721. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_catalog.py +503 -0
  722. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_column.py +225 -0
  723. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_conf.py +83 -0
  724. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_context.py +201 -0
  725. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_dataframe.py +1931 -0
  726. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_datasources.py +256 -0
  727. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_errors.py +69 -0
  728. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_functions.py +1349 -0
  729. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_group.py +53 -0
  730. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_pandas_sqlmetrics.py +68 -0
  731. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_readwriter.py +283 -0
  732. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_serde.py +155 -0
  733. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_session.py +412 -0
  734. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_types.py +1581 -0
  735. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_udf.py +961 -0
  736. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_udf_profiler.py +165 -0
  737. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_udtf.py +1456 -0
  738. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_utils.py +1686 -0
  739. snowflake/snowpark_connect/includes/python/pyspark/sql/types.py +2558 -0
  740. snowflake/snowpark_connect/includes/python/pyspark/sql/udf.py +714 -0
  741. snowflake/snowpark_connect/includes/python/pyspark/sql/udtf.py +325 -0
  742. snowflake/snowpark_connect/includes/python/pyspark/sql/utils.py +339 -0
  743. snowflake/snowpark_connect/includes/python/pyspark/sql/window.py +492 -0
  744. snowflake/snowpark_connect/includes/python/pyspark/statcounter.py +165 -0
  745. snowflake/snowpark_connect/includes/python/pyspark/status.py +112 -0
  746. snowflake/snowpark_connect/includes/python/pyspark/storagelevel.py +97 -0
  747. snowflake/snowpark_connect/includes/python/pyspark/streaming/__init__.py +22 -0
  748. snowflake/snowpark_connect/includes/python/pyspark/streaming/context.py +471 -0
  749. snowflake/snowpark_connect/includes/python/pyspark/streaming/dstream.py +933 -0
  750. snowflake/snowpark_connect/includes/python/pyspark/streaming/kinesis.py +205 -0
  751. snowflake/snowpark_connect/includes/python/pyspark/streaming/listener.py +83 -0
  752. snowflake/snowpark_connect/includes/python/pyspark/streaming/tests/__init__.py +16 -0
  753. snowflake/snowpark_connect/includes/python/pyspark/streaming/tests/test_context.py +184 -0
  754. snowflake/snowpark_connect/includes/python/pyspark/streaming/tests/test_dstream.py +706 -0
  755. snowflake/snowpark_connect/includes/python/pyspark/streaming/tests/test_kinesis.py +118 -0
  756. snowflake/snowpark_connect/includes/python/pyspark/streaming/tests/test_listener.py +160 -0
  757. snowflake/snowpark_connect/includes/python/pyspark/streaming/util.py +168 -0
  758. snowflake/snowpark_connect/includes/python/pyspark/taskcontext.py +502 -0
  759. snowflake/snowpark_connect/includes/python/pyspark/testing/__init__.py +21 -0
  760. snowflake/snowpark_connect/includes/python/pyspark/testing/connectutils.py +199 -0
  761. snowflake/snowpark_connect/includes/python/pyspark/testing/mllibutils.py +30 -0
  762. snowflake/snowpark_connect/includes/python/pyspark/testing/mlutils.py +275 -0
  763. snowflake/snowpark_connect/includes/python/pyspark/testing/objects.py +121 -0
  764. snowflake/snowpark_connect/includes/python/pyspark/testing/pandasutils.py +714 -0
  765. snowflake/snowpark_connect/includes/python/pyspark/testing/sqlutils.py +168 -0
  766. snowflake/snowpark_connect/includes/python/pyspark/testing/streamingutils.py +178 -0
  767. snowflake/snowpark_connect/includes/python/pyspark/testing/utils.py +636 -0
  768. snowflake/snowpark_connect/includes/python/pyspark/tests/__init__.py +16 -0
  769. snowflake/snowpark_connect/includes/python/pyspark/tests/test_appsubmit.py +306 -0
  770. snowflake/snowpark_connect/includes/python/pyspark/tests/test_broadcast.py +196 -0
  771. snowflake/snowpark_connect/includes/python/pyspark/tests/test_conf.py +44 -0
  772. snowflake/snowpark_connect/includes/python/pyspark/tests/test_context.py +346 -0
  773. snowflake/snowpark_connect/includes/python/pyspark/tests/test_daemon.py +89 -0
  774. snowflake/snowpark_connect/includes/python/pyspark/tests/test_install_spark.py +124 -0
  775. snowflake/snowpark_connect/includes/python/pyspark/tests/test_join.py +69 -0
  776. snowflake/snowpark_connect/includes/python/pyspark/tests/test_memory_profiler.py +167 -0
  777. snowflake/snowpark_connect/includes/python/pyspark/tests/test_pin_thread.py +194 -0
  778. snowflake/snowpark_connect/includes/python/pyspark/tests/test_profiler.py +168 -0
  779. snowflake/snowpark_connect/includes/python/pyspark/tests/test_rdd.py +939 -0
  780. snowflake/snowpark_connect/includes/python/pyspark/tests/test_rddbarrier.py +52 -0
  781. snowflake/snowpark_connect/includes/python/pyspark/tests/test_rddsampler.py +66 -0
  782. snowflake/snowpark_connect/includes/python/pyspark/tests/test_readwrite.py +368 -0
  783. snowflake/snowpark_connect/includes/python/pyspark/tests/test_serializers.py +257 -0
  784. snowflake/snowpark_connect/includes/python/pyspark/tests/test_shuffle.py +267 -0
  785. snowflake/snowpark_connect/includes/python/pyspark/tests/test_stage_sched.py +153 -0
  786. snowflake/snowpark_connect/includes/python/pyspark/tests/test_statcounter.py +130 -0
  787. snowflake/snowpark_connect/includes/python/pyspark/tests/test_taskcontext.py +350 -0
  788. snowflake/snowpark_connect/includes/python/pyspark/tests/test_util.py +97 -0
  789. snowflake/snowpark_connect/includes/python/pyspark/tests/test_worker.py +271 -0
  790. snowflake/snowpark_connect/includes/python/pyspark/traceback_utils.py +81 -0
  791. snowflake/snowpark_connect/includes/python/pyspark/util.py +416 -0
  792. snowflake/snowpark_connect/includes/python/pyspark/version.py +19 -0
  793. snowflake/snowpark_connect/includes/python/pyspark/worker.py +1307 -0
  794. snowflake/snowpark_connect/includes/python/pyspark/worker_util.py +46 -0
  795. snowflake/snowpark_connect/proto/__init__.py +10 -0
  796. snowflake/snowpark_connect/proto/control_pb2.py +35 -0
  797. snowflake/snowpark_connect/proto/control_pb2.pyi +38 -0
  798. snowflake/snowpark_connect/proto/control_pb2_grpc.py +183 -0
  799. snowflake/snowpark_connect/proto/snowflake_expression_ext_pb2.py +35 -0
  800. snowflake/snowpark_connect/proto/snowflake_expression_ext_pb2.pyi +53 -0
  801. snowflake/snowpark_connect/proto/snowflake_rdd_pb2.pyi +39 -0
  802. snowflake/snowpark_connect/proto/snowflake_relation_ext_pb2.py +47 -0
  803. snowflake/snowpark_connect/proto/snowflake_relation_ext_pb2.pyi +111 -0
  804. snowflake/snowpark_connect/relation/__init__.py +3 -0
  805. snowflake/snowpark_connect/relation/catalogs/__init__.py +12 -0
  806. snowflake/snowpark_connect/relation/catalogs/abstract_spark_catalog.py +287 -0
  807. snowflake/snowpark_connect/relation/catalogs/snowflake_catalog.py +467 -0
  808. snowflake/snowpark_connect/relation/catalogs/utils.py +51 -0
  809. snowflake/snowpark_connect/relation/io_utils.py +76 -0
  810. snowflake/snowpark_connect/relation/map_aggregate.py +322 -0
  811. snowflake/snowpark_connect/relation/map_catalog.py +151 -0
  812. snowflake/snowpark_connect/relation/map_column_ops.py +1068 -0
  813. snowflake/snowpark_connect/relation/map_crosstab.py +48 -0
  814. snowflake/snowpark_connect/relation/map_extension.py +412 -0
  815. snowflake/snowpark_connect/relation/map_join.py +341 -0
  816. snowflake/snowpark_connect/relation/map_local_relation.py +326 -0
  817. snowflake/snowpark_connect/relation/map_map_partitions.py +146 -0
  818. snowflake/snowpark_connect/relation/map_relation.py +253 -0
  819. snowflake/snowpark_connect/relation/map_row_ops.py +716 -0
  820. snowflake/snowpark_connect/relation/map_sample_by.py +35 -0
  821. snowflake/snowpark_connect/relation/map_show_string.py +50 -0
  822. snowflake/snowpark_connect/relation/map_sql.py +1874 -0
  823. snowflake/snowpark_connect/relation/map_stats.py +324 -0
  824. snowflake/snowpark_connect/relation/map_subquery_alias.py +32 -0
  825. snowflake/snowpark_connect/relation/map_udtf.py +288 -0
  826. snowflake/snowpark_connect/relation/read/__init__.py +7 -0
  827. snowflake/snowpark_connect/relation/read/jdbc_read_dbapi.py +668 -0
  828. snowflake/snowpark_connect/relation/read/map_read.py +367 -0
  829. snowflake/snowpark_connect/relation/read/map_read_csv.py +142 -0
  830. snowflake/snowpark_connect/relation/read/map_read_jdbc.py +108 -0
  831. snowflake/snowpark_connect/relation/read/map_read_json.py +344 -0
  832. snowflake/snowpark_connect/relation/read/map_read_parquet.py +194 -0
  833. snowflake/snowpark_connect/relation/read/map_read_socket.py +59 -0
  834. snowflake/snowpark_connect/relation/read/map_read_table.py +109 -0
  835. snowflake/snowpark_connect/relation/read/map_read_text.py +106 -0
  836. snowflake/snowpark_connect/relation/read/reader_config.py +399 -0
  837. snowflake/snowpark_connect/relation/read/utils.py +155 -0
  838. snowflake/snowpark_connect/relation/stage_locator.py +161 -0
  839. snowflake/snowpark_connect/relation/utils.py +219 -0
  840. snowflake/snowpark_connect/relation/write/__init__.py +3 -0
  841. snowflake/snowpark_connect/relation/write/jdbc_write_dbapi.py +339 -0
  842. snowflake/snowpark_connect/relation/write/map_write.py +436 -0
  843. snowflake/snowpark_connect/relation/write/map_write_jdbc.py +48 -0
  844. snowflake/snowpark_connect/resources/java_udfs-1.0-SNAPSHOT.jar +0 -0
  845. snowflake/snowpark_connect/resources_initializer.py +75 -0
  846. snowflake/snowpark_connect/server.py +1136 -0
  847. snowflake/snowpark_connect/start_server.py +32 -0
  848. snowflake/snowpark_connect/tcm.py +8 -0
  849. snowflake/snowpark_connect/type_mapping.py +1003 -0
  850. snowflake/snowpark_connect/typed_column.py +94 -0
  851. snowflake/snowpark_connect/utils/__init__.py +3 -0
  852. snowflake/snowpark_connect/utils/artifacts.py +48 -0
  853. snowflake/snowpark_connect/utils/attribute_handling.py +72 -0
  854. snowflake/snowpark_connect/utils/cache.py +84 -0
  855. snowflake/snowpark_connect/utils/concurrent.py +124 -0
  856. snowflake/snowpark_connect/utils/context.py +390 -0
  857. snowflake/snowpark_connect/utils/describe_query_cache.py +231 -0
  858. snowflake/snowpark_connect/utils/interrupt.py +85 -0
  859. snowflake/snowpark_connect/utils/io_utils.py +35 -0
  860. snowflake/snowpark_connect/utils/pandas_udtf_utils.py +117 -0
  861. snowflake/snowpark_connect/utils/profiling.py +47 -0
  862. snowflake/snowpark_connect/utils/session.py +180 -0
  863. snowflake/snowpark_connect/utils/snowpark_connect_logging.py +38 -0
  864. snowflake/snowpark_connect/utils/telemetry.py +513 -0
  865. snowflake/snowpark_connect/utils/udf_cache.py +392 -0
  866. snowflake/snowpark_connect/utils/udf_helper.py +328 -0
  867. snowflake/snowpark_connect/utils/udf_utils.py +310 -0
  868. snowflake/snowpark_connect/utils/udtf_helper.py +420 -0
  869. snowflake/snowpark_connect/utils/udtf_utils.py +799 -0
  870. snowflake/snowpark_connect/utils/xxhash64.py +247 -0
  871. snowflake/snowpark_connect/version.py +6 -0
  872. snowpark_connect-0.20.2.data/scripts/snowpark-connect +71 -0
  873. snowpark_connect-0.20.2.data/scripts/snowpark-session +11 -0
  874. snowpark_connect-0.20.2.data/scripts/snowpark-submit +354 -0
  875. snowpark_connect-0.20.2.dist-info/METADATA +37 -0
  876. snowpark_connect-0.20.2.dist-info/RECORD +879 -0
  877. snowpark_connect-0.20.2.dist-info/WHEEL +5 -0
  878. snowpark_connect-0.20.2.dist-info/licenses/LICENSE.txt +202 -0
  879. snowpark_connect-0.20.2.dist-info/top_level.txt +1 -0
@@ -0,0 +1,1874 @@
1
+ #
2
+ # Copyright (c) 2012-2025 Snowflake Computing Inc. All rights reserved.
3
+ #
4
+
5
+ import re
6
+ from collections.abc import MutableMapping, MutableSequence
7
+ from contextlib import contextmanager
8
+ from contextvars import ContextVar
9
+ from functools import reduce
10
+
11
+ import jpype
12
+ import pandas
13
+ import pyspark.sql.connect.proto.expressions_pb2 as expressions_proto
14
+ import pyspark.sql.connect.proto.relations_pb2 as relation_proto
15
+ import sqlglot
16
+ from google.protobuf.any_pb2 import Any
17
+ from pyspark.errors.exceptions.base import AnalysisException
18
+ from sqlglot.expressions import ColumnDef, DataType, FileFormatProperty, Identifier
19
+
20
+ import snowflake.snowpark.functions as snowpark_fn
21
+ import snowflake.snowpark_connect.proto.snowflake_expression_ext_pb2 as snowflake_exp_proto
22
+ import snowflake.snowpark_connect.proto.snowflake_relation_ext_pb2 as snowflake_proto
23
+ from snowflake import snowpark
24
+ from snowflake.snowpark._internal.analyzer.analyzer_utils import (
25
+ quote_name_without_upper_casing,
26
+ unquote_if_quoted,
27
+ )
28
+ from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
29
+ from snowflake.snowpark._internal.utils import is_sql_select_statement
30
+ from snowflake.snowpark.functions import when_matched, when_not_matched
31
+ from snowflake.snowpark_connect.config import (
32
+ auto_uppercase_ddl,
33
+ get_boolean_session_config_param,
34
+ global_config,
35
+ set_config_param,
36
+ unset_config_param,
37
+ )
38
+ from snowflake.snowpark_connect.expression.map_expression import (
39
+ ColumnNameMap,
40
+ map_single_column_expression,
41
+ )
42
+ from snowflake.snowpark_connect.expression.typer import ExpressionTyper
43
+ from snowflake.snowpark_connect.relation.catalogs.utils import (
44
+ CURRENT_CATALOG_NAME,
45
+ _get_current_temp_objects,
46
+ )
47
+ from snowflake.snowpark_connect.relation.map_relation import (
48
+ NATURAL_JOIN_TYPE_BASE,
49
+ map_relation,
50
+ )
51
+ from snowflake.snowpark_connect.type_mapping import map_snowpark_to_pyspark_types
52
+ from snowflake.snowpark_connect.utils.context import (
53
+ _accessing_temp_object,
54
+ gen_sql_plan_id,
55
+ get_session_id,
56
+ push_evaluating_sql_scope,
57
+ push_sql_scope,
58
+ set_plan_id_map,
59
+ set_sql_args,
60
+ set_sql_plan_name,
61
+ )
62
+ from snowflake.snowpark_connect.utils.session import get_or_create_snowpark_session
63
+ from snowflake.snowpark_connect.utils.telemetry import (
64
+ SnowparkConnectNotImplementedError,
65
+ )
66
+
67
+ from .. import column_name_handler
68
+ from ..expression.map_sql_expression import (
69
+ _window_specs,
70
+ as_java_list,
71
+ as_java_map,
72
+ map_logical_plan_expression,
73
+ sql_parser,
74
+ )
75
+
76
+ _ctes = ContextVar[dict[str, relation_proto.Relation]]("_ctes", default={})
77
+
78
+
79
+ def _is_sql_select_statement_helper(sql_string: str) -> bool:
80
+ """
81
+ Determine if a SQL string is a SELECT or CTE query statement, even when it starts with comments or whitespace.
82
+ """
83
+ if not sql_string:
84
+ return False
85
+
86
+ trimmed = sql_string.lstrip()
87
+
88
+ while trimmed:
89
+ if trimmed.startswith("--"):
90
+ newline_pos = trimmed.find("\n")
91
+ if newline_pos == -1:
92
+ return False
93
+ trimmed = trimmed[newline_pos + 1 :].lstrip()
94
+ continue
95
+ elif trimmed.startswith("/*"):
96
+ end_pos = trimmed.find("*/")
97
+ if end_pos == -1:
98
+ return False
99
+ trimmed = trimmed[end_pos + 2 :].lstrip()
100
+ continue
101
+ break
102
+
103
+ if not trimmed:
104
+ return False
105
+
106
+ return is_sql_select_statement(trimmed)
107
+
108
+
109
+ @contextmanager
110
+ def _push_cte_scope():
111
+ """
112
+ Creates a new CTE scope when evaluating nested WITH clauses.
113
+ """
114
+ cur = _ctes.get()
115
+ token = _ctes.set(cur.copy())
116
+ try:
117
+ yield
118
+ finally:
119
+ _ctes.reset(token)
120
+
121
+
122
+ @contextmanager
123
+ def _push_window_specs_scope():
124
+ """
125
+ Creates a new window specs scope when evaluating nested clauses.
126
+ """
127
+ cur = _window_specs.get()
128
+ token = _window_specs.set(cur.copy())
129
+ try:
130
+ yield
131
+ finally:
132
+ _window_specs.reset(token)
133
+
134
+
135
+ def _find_pos_args(node, positions: list[int]):
136
+ if str(node.nodeName()) == "PosParameter":
137
+ positions.append(node.pos())
138
+ else:
139
+ for child in as_java_list(node.children()):
140
+ _find_pos_args(child, positions)
141
+ if hasattr(node, "expressions"):
142
+ for child in as_java_list(node.expressions()):
143
+ _find_pos_args(child, positions)
144
+
145
+
146
+ def parse_pos_args(
147
+ logical_plan,
148
+ pos_args: MutableSequence[expressions_proto.Expression.Literal],
149
+ ) -> dict[int, expressions_proto.Expression]:
150
+ # Spark Connect gives us positional parameters as a regular list,
151
+ # while Spark parser refers to them by their character indexes in the query.
152
+ # Therefore, we need to find all positional parameters, sort their locations,
153
+ # and match them to the list from Spark Connect.
154
+ if not pos_args:
155
+ return {}
156
+
157
+ positions: list[int] = []
158
+ _find_pos_args(logical_plan, positions)
159
+ return dict(zip(sorted(positions), pos_args))
160
+
161
+
162
+ def execute_logical_plan(logical_plan) -> snowpark.DataFrame:
163
+ proto = map_logical_plan_relation(logical_plan)
164
+ with push_evaluating_sql_scope():
165
+ return map_relation(proto)
166
+
167
+
168
+ def _spark_to_snowflake_single_id(name: str) -> str:
169
+ name = quote_name_without_upper_casing(name)
170
+ return name.upper() if auto_uppercase_ddl() else name
171
+
172
+
173
+ def _spark_to_snowflake(multipart_id: jpype.JObject) -> str:
174
+ return ".".join(
175
+ _spark_to_snowflake_single_id(str(part)) for part in as_java_list(multipart_id)
176
+ )
177
+
178
+
179
+ def _rename_columns(
180
+ df: snowpark.DataFrame, user_specified_columns
181
+ ) -> snowpark.DataFrame:
182
+ user_columns = [str(col._1()) for col in as_java_list(user_specified_columns)]
183
+
184
+ if user_columns:
185
+ columns = zip(df.columns, user_columns)
186
+ else:
187
+ columns = df._column_map.snowpark_to_spark_map().items()
188
+
189
+ for orig_column, user_column in columns:
190
+ df = df.with_column_renamed(
191
+ orig_column, _spark_to_snowflake_single_id(user_column)
192
+ )
193
+
194
+ return df
195
+
196
+
197
+ def _create_table_as_select(logical_plan, mode: str) -> None:
198
+ # TODO: for as select create tables we'd map multi layer identifier here
199
+ name = get_relation_identifier_name(logical_plan.name())
200
+ comment = logical_plan.tableSpec().comment()
201
+
202
+ df = execute_logical_plan(logical_plan.query())
203
+ columns = df._column_map.snowpark_to_spark_map().items()
204
+ for orig_column, user_column in columns:
205
+ df = df.with_column_renamed(
206
+ orig_column, _spark_to_snowflake_single_id(user_column)
207
+ )
208
+
209
+ # TODO escaping should be handled by snowpark. remove when SNOW-2210271 is done
210
+ def _escape(comment: str) -> str:
211
+ return comment.replace("\\", "\\\\")
212
+
213
+ df.write.save_as_table(
214
+ name,
215
+ comment=None if comment.isEmpty() else _escape(comment.get()),
216
+ mode=mode,
217
+ )
218
+
219
+
220
+ def _spark_field_to_sql(field: jpype.JObject, is_column: bool) -> str:
221
+ # Column names will be uppercased according to "snowpark.connect.auto-uppercase.ddl",
222
+ # and struct fields will be left as is. This should allow users to use the same names
223
+ # in spark and Snowflake in most cases.
224
+ if is_column:
225
+ name = _spark_to_snowflake_single_id(str(field.name()))
226
+ else:
227
+ name = quote_name_without_upper_casing(str(field.name()))
228
+ data_type_str = _spark_datatype_to_sql(field.dataType())
229
+ # TODO: Support comments
230
+ return f"{name} {data_type_str}"
231
+
232
+
233
+ def _spark_datatype_to_sql(data_type: jpype.JObject) -> str:
234
+ match data_type.typeName():
235
+ case "array":
236
+ element_type_str = _spark_datatype_to_sql(data_type.elementType())
237
+ return f"ARRAY({element_type_str})"
238
+ case "map":
239
+ key_type_str = _spark_datatype_to_sql(data_type.keyType())
240
+ value_type_str = _spark_datatype_to_sql(data_type.valueType())
241
+ return f"MAP({key_type_str}, {value_type_str})"
242
+ case "struct":
243
+ field_types_str = ", ".join(
244
+ _spark_field_to_sql(f, False) for f in data_type.fields()
245
+ )
246
+ return f"OBJECT({field_types_str})"
247
+ case _:
248
+ return data_type.sql()
249
+
250
+
251
+ def _normalize_identifiers(node):
252
+ """
253
+ Fix spark-quoted identifiers parsed with sqlglot.
254
+
255
+ sqlglot detects quoted spark identifiers which makes them quoted in the Snowflake SQL string.
256
+ This behaviour is not consistent with Spark, where non-column identifiers are case insensitive.
257
+ The identifiers need to be uppercased to match Snowflake's behaviour. Users can disable this by setting
258
+ the `snowpark.connect.auto_uppercase_ddl` config to False.
259
+ """
260
+ if isinstance(node, Identifier):
261
+ fixed = node.this.upper() if auto_uppercase_ddl() else node.this
262
+ return Identifier(this=fixed, quoted=True)
263
+ return node
264
+
265
+
266
+ def _remove_file_format_property(node):
267
+ """
268
+ Fix spark-quoted identifiers parsed with sqlglot.
269
+
270
+ sqlglot detects quoted spark identifiers which makes them quoted in the Snowflake SQL string.
271
+ This behaviour is not consistent with Spark, where non-column identifiers are case insensitive.
272
+ The identifiers need to be uppercased to match Snowflake's behaviour. Users can disable this by setting
273
+ the `snowpark.connect.auto_uppercase_ddl` config to False.
274
+ """
275
+ if isinstance(node, FileFormatProperty):
276
+ return None
277
+ return node
278
+
279
+
280
+ def _remove_column_data_type(node):
281
+ """
282
+ Fix spark-quoted identifiers parsed with sqlglot.
283
+
284
+ sqlglot detects quoted spark identifiers which makes them quoted in the Snowflake SQL string.
285
+ This behaviour is not consistent with Spark, where non-column identifiers are case insensitive.
286
+ The identifiers need to be uppercased to match Snowflake's behaviour. Users can disable this by setting
287
+ the `snowpark.connect.auto_uppercase_ddl` config to False.
288
+ """
289
+ if isinstance(node, DataType) and isinstance(node.parent, ColumnDef):
290
+ return None
291
+ return node
292
+
293
+
294
+ def map_sql_to_pandas_df(
295
+ sql_string: str,
296
+ named_args: MutableMapping[str, expressions_proto.Expression.Literal],
297
+ pos_args: MutableSequence[expressions_proto.Expression.Literal],
298
+ ) -> tuple[pandas.DataFrame, str] | tuple[None, None]:
299
+ """
300
+ Convert a sql string into a pandas DataFrame and its json schema.
301
+ returns a tuple of empty Pandas DataFrame and schema string in case of DDL statements.
302
+ returns a tuple of None for SELECT queries to enable lazy evaluation
303
+ """
304
+
305
+ snowpark_connect_sql_passthrough = get_sql_passthrough()
306
+
307
+ if not snowpark_connect_sql_passthrough:
308
+ logical_plan = sql_parser().parsePlan(sql_string)
309
+ parsed_pos_args = parse_pos_args(logical_plan, pos_args)
310
+ set_sql_args(named_args, parsed_pos_args)
311
+
312
+ session = get_or_create_snowpark_session()
313
+
314
+ rows: list | None = None
315
+
316
+ while (
317
+ class_name := str(logical_plan.getClass().getSimpleName())
318
+ ) == "UnresolvedHint":
319
+ logical_plan = logical_plan.child()
320
+
321
+ match class_name:
322
+ case "AddColumns":
323
+ # Handle ALTER TABLE ... ADD COLUMNS (col_name data_type) -> ADD COLUMN col_name data_type
324
+ table_name = get_relation_identifier_name(logical_plan.table(), True)
325
+
326
+ # Get column definitions from logical plan
327
+ columns_to_add = logical_plan.columnsToAdd()
328
+ # Build Snowflake SQL from logical plan attributes
329
+ for col in as_java_list(columns_to_add):
330
+ # Follow the same pattern as AlterColumn for column name extraction
331
+ col_name = ".".join(str(part) for part in as_java_list(col.name()))
332
+ col_type = _spark_datatype_to_sql(col.dataType())
333
+ snowflake_sql = (
334
+ f"ALTER TABLE {table_name} ADD COLUMN {col_name} {col_type}"
335
+ )
336
+ session.sql(snowflake_sql).collect()
337
+ case "AlterColumn":
338
+ # Handle ALTER TABLE ... CHANGE COLUMN (translate to ALTER TABLE ... ALTER COLUMN)
339
+ table_name = get_relation_identifier_name(logical_plan.table(), True)
340
+ column_obj = logical_plan.column()
341
+
342
+ # Extract actual column name
343
+ column_name = ".".join(
344
+ str(part) for part in as_java_list(column_obj.name())
345
+ )
346
+
347
+ # Build ALTER COLUMN command from logical plan attributes
348
+ alter_parts = []
349
+
350
+ # Check for comment change - Scala Some() vs None
351
+ comment_obj = logical_plan.comment()
352
+ if (
353
+ comment_obj is not None
354
+ and str(comment_obj.getClass().getSimpleName()) == "Some"
355
+ ):
356
+ comment = _escape_sql_comment(str(comment_obj.get()))
357
+ alter_parts.append(f"COMMENT '{comment}'")
358
+
359
+ # Check for dataType change - handle Scala Some/None
360
+ data_type_obj = logical_plan.dataType()
361
+ if (
362
+ data_type_obj is not None
363
+ and str(data_type_obj.getClass().getSimpleName()) == "Some"
364
+ ):
365
+ # Extract the actual data type from Scala Some()
366
+ actual_data_type = data_type_obj.get()
367
+ data_type = _spark_datatype_to_sql(actual_data_type)
368
+ alter_parts.append(f"TYPE {data_type}")
369
+
370
+ if alter_parts:
371
+ alter_clause = ", ".join(alter_parts)
372
+ snowflake_sql = f"ALTER TABLE {table_name} ALTER COLUMN {column_name} {alter_clause}"
373
+ session.sql(snowflake_sql).collect()
374
+ else:
375
+ raise ValueError(
376
+ f"No alter operations found in AlterColumn logical plan for table {table_name}, column {column_name}"
377
+ )
378
+ case "CreateNamespace":
379
+ name = get_relation_identifier_name(logical_plan.name(), True)
380
+ previous_name = session.connection.schema
381
+ if_not_exists = "IF NOT EXISTS " if logical_plan.ifNotExists() else ""
382
+ session.sql(f"CREATE SCHEMA {if_not_exists}{name}").collect()
383
+ if previous_name is not None:
384
+ session.sql(
385
+ f"USE SCHEMA {_spark_to_snowflake_single_id(previous_name)}"
386
+ ).collect()
387
+ else:
388
+ # TODO: Unset the schema
389
+ pass
390
+ case "CreateTable":
391
+ name = get_relation_identifier_name(logical_plan.name())
392
+ columns = ", ".join(
393
+ _spark_field_to_sql(f, True)
394
+ for f in logical_plan.tableSchema().fields()
395
+ )
396
+ if_not_exists = (
397
+ "IF NOT EXISTS " if logical_plan.ignoreIfExists() else ""
398
+ )
399
+ comment_opt = logical_plan.tableSpec().comment()
400
+ comment = (
401
+ f"COMMENT = '{_escape_sql_comment(str(comment_opt.get()))}'"
402
+ if comment_opt.isDefined()
403
+ else ""
404
+ )
405
+ # NOTE: We are intentionally ignoring any FORMAT=... parameters here.
406
+ session.sql(
407
+ f"CREATE TABLE {if_not_exists}{name} ({columns}) {comment}"
408
+ ).collect()
409
+ case "CreateTableAsSelect":
410
+ mode = "ignore" if logical_plan.ignoreIfExists() else "errorifexists"
411
+ _create_table_as_select(logical_plan, mode=mode)
412
+ case "CreateTableLikeCommand":
413
+ source = get_relation_identifier_name(logical_plan.sourceTable())
414
+ name = get_relation_identifier_name(logical_plan.targetTable())
415
+ if_not_exists = "IF NOT EXISTS " if logical_plan.ifNotExists() else ""
416
+ session.sql(
417
+ f"CREATE TABLE {if_not_exists}{name} LIKE {source}"
418
+ ).collect()
419
+ case "CreateTempViewUsing":
420
+ empty_select = (
421
+ " AS SELECT * WHERE 1 = 0"
422
+ if logical_plan.options().isEmpty()
423
+ and logical_plan.children().isEmpty()
424
+ else ""
425
+ )
426
+ parsed_sql = (
427
+ sqlglot.parse_one(sql_string, dialect="spark")
428
+ .transform(_normalize_identifiers)
429
+ .transform(_remove_column_data_type)
430
+ .transform(_remove_file_format_property)
431
+ )
432
+ snowflake_sql = parsed_sql.sql(dialect="snowflake")
433
+ session.sql(f"{snowflake_sql}{empty_select}").collect()
434
+ case "CreateView":
435
+ current_schema = session.connection.schema
436
+ if (
437
+ str(logical_plan.child().getClass().getSimpleName())
438
+ == "PlanWithUnresolvedIdentifier"
439
+ ):
440
+ object_name: str = str(
441
+ logical_plan.child().identifierExpr().value()
442
+ )
443
+ else:
444
+ object_name: str = as_java_list(logical_plan.child().nameParts())[0]
445
+ _accessing_temp_object.set(False)
446
+ df = execute_logical_plan(logical_plan.query())
447
+ if _accessing_temp_object.get():
448
+ raise AnalysisException(
449
+ f"[INVALID_TEMP_OBJ_REFERENCE] Cannot create the persistent object `{CURRENT_CATALOG_NAME}`.`{current_schema}`.`{object_name}` "
450
+ "of the type VIEW because it references to a temporary object of the type VIEW. Please "
451
+ f"make the temporary object persistent, or make the persistent object `{CURRENT_CATALOG_NAME}`.`{current_schema}`.`{object_name}` temporary."
452
+ )
453
+
454
+ name = get_relation_identifier_name(logical_plan.child())
455
+ comment = logical_plan.comment()
456
+
457
+ df = _rename_columns(df, logical_plan.userSpecifiedColumns())
458
+
459
+ # TODO: Support logical_plan.replace() == False
460
+ df.create_or_replace_view(
461
+ name,
462
+ comment=_escape_sql_comment(str(comment.get()))
463
+ if comment.isDefined()
464
+ else None,
465
+ )
466
+ case "CreateViewCommand":
467
+ df = execute_logical_plan(logical_plan.plan())
468
+ tmp_views = _get_current_temp_objects()
469
+ tmp_views.add(
470
+ (
471
+ CURRENT_CATALOG_NAME,
472
+ session.connection.schema,
473
+ str(logical_plan.name().identifier()),
474
+ )
475
+ )
476
+
477
+ name = str(logical_plan.name().identifier())
478
+ name = _spark_to_snowflake_single_id(name)
479
+ if isinstance(
480
+ logical_plan.viewType(),
481
+ jpype.JClass(
482
+ "org.apache.spark.sql.catalyst.analysis.GlobalTempView$"
483
+ ),
484
+ ):
485
+ name = f"{global_config.spark_sql_globalTempDatabase}.{name}"
486
+ comment = logical_plan.comment()
487
+ maybe_comment = (
488
+ _escape_sql_comment(str(comment.get()))
489
+ if comment.isDefined()
490
+ else None
491
+ )
492
+
493
+ df = _rename_columns(df, logical_plan.userSpecifiedColumns())
494
+
495
+ if logical_plan.replace():
496
+ df.create_or_replace_temp_view(
497
+ name,
498
+ comment=maybe_comment,
499
+ )
500
+ else:
501
+ df.create_temp_view(
502
+ name,
503
+ comment=maybe_comment,
504
+ )
505
+ case "DescribeColumn":
506
+ name = get_relation_identifier_name(logical_plan.column())
507
+ rows = session.sql(f"DESCRIBE TABLE {name}").collect()
508
+ case "DescribeNamespace":
509
+ name = get_relation_identifier_name(logical_plan.namespace(), True)
510
+ rows = session.sql(f"DESCRIBE SCHEMA {name}").collect()
511
+ if not rows:
512
+ rows = None
513
+ case "DescribeRelation":
514
+ name = get_relation_identifier_name(logical_plan.relation(), True)
515
+ rows = session.sql(f"DESCRIBE TABLE {name}").collect()
516
+ if not rows:
517
+ rows = None
518
+ case "DescribeQueryCommand":
519
+ # Handle DESCRIBE QUERY <sql> commands
520
+ # Since Snowflake doesn't support DESCRIBE QUERY syntax, we use DataFrame schema analysis
521
+ # This gets the schema without executing the query (similar to Spark's DESCRIBE QUERY)
522
+ # Get the inner query plan and convert it to SQL
523
+ inner_query_plan = logical_plan.plan()
524
+ df = execute_logical_plan(inner_query_plan)
525
+ schema = df.schema
526
+
527
+ # Get original Spark column names using the column map from the original DataFrame
528
+ spark_columns = df._column_map.get_spark_columns()
529
+ data = []
530
+ for i, field in enumerate(schema.fields):
531
+ # Use original Spark column name from column map
532
+ col_name = spark_columns[i]
533
+
534
+ # Convert Snowpark data type to PySpark data type and get simpleString
535
+ pyspark_type = map_snowpark_to_pyspark_types(field.datatype)
536
+ data_type_str = pyspark_type.simpleString()
537
+
538
+ data.append(
539
+ {
540
+ "col_name": col_name,
541
+ "data_type": data_type_str,
542
+ "comment": None, # Snowflake schema doesn't include comments
543
+ }
544
+ )
545
+ return pandas.DataFrame(data), ""
546
+ case "DropFunctionCommand":
547
+ func_name = logical_plan.identifier().funcName().lower()
548
+ input_types, snowpark_name = [], ""
549
+ if func_name in session._udfs:
550
+ input_types, snowpark_name = (
551
+ session._udfs[func_name].input_types,
552
+ session._udfs[func_name].name,
553
+ )
554
+ del session._udfs[func_name]
555
+ elif func_name in session._udtfs:
556
+ input_types, snowpark_name = (
557
+ session._udtfs[func_name][0].input_types,
558
+ session._udtfs[func_name][0].name,
559
+ )
560
+ del session._udtfs[func_name]
561
+ else:
562
+ if not logical_plan.ifExists():
563
+ raise ValueError(
564
+ f"Function {func_name} not found among registered UDFs or UDTFs."
565
+ )
566
+ if snowpark_name != "":
567
+ argument_string = f"({', '.join(convert_sp_to_sf_type(arg) for arg in input_types)})"
568
+ session.sql(
569
+ f"DROP FUNCTION {'IF EXISTS' if logical_plan.ifExists() else ''} {snowpark_name}{argument_string}"
570
+ ).collect()
571
+ case "DropNamespace":
572
+ name = get_relation_identifier_name(logical_plan.namespace(), True)
573
+ if_exists = "IF EXISTS " if logical_plan.ifExists() else ""
574
+ session.sql(f"DROP SCHEMA {if_exists}{name}").collect()
575
+ case "DropTable":
576
+ name = get_relation_identifier_name(logical_plan.child())
577
+ if_exists = "IF EXISTS " if logical_plan.ifExists() else ""
578
+ session.sql(f"DROP TABLE {if_exists}{name}").collect()
579
+ case "DropView":
580
+ name = get_relation_identifier_name(logical_plan.child())
581
+ if_exists = "IF EXISTS " if logical_plan.ifExists() else ""
582
+ session.sql(f"DROP VIEW {if_exists}{name}").collect()
583
+ case "ExplainCommand":
584
+ inner_plan = logical_plan.logicalPlan()
585
+ logical_plan_name = inner_plan.nodeName()
586
+
587
+ # Handle EXPLAIN DESCRIBE QUERY commands
588
+ if logical_plan_name == "DescribeQueryCommand":
589
+ # For EXPLAIN DESCRIBE QUERY, we should return an explanation of the describe operation itself
590
+ # NOT execute the inner query to get its SQL
591
+ query_plan = inner_plan.plan()
592
+ plan_description = (
593
+ f"Describe query plan for: {query_plan.nodeName()}"
594
+ )
595
+ rows = [snowpark.Row(plan=plan_description)]
596
+ elif logical_plan_name == "DescribeRelation":
597
+ # For EXPLAIN DESCRIBE RELATION, we should return an explanation of the describe operation itself
598
+ # NOT execute the inner query to get its SQL
599
+ relation = inner_plan.relation()
600
+ plan_description = (
601
+ f"Describe relation plan for: {relation.commandName()}"
602
+ )
603
+ rows = [snowpark.Row(plan=plan_description)]
604
+ elif logical_plan_name == "DescribeColumn":
605
+ # For EXPLAIN DESCRIBE COLUMN, we should return an explanation of the describe operation itself
606
+ # NOT execute the inner query to get its SQL
607
+ column = inner_plan.column()
608
+ plan_description = f"Describe column plan for: [{column.name()}]"
609
+ rows = [snowpark.Row(plan=plan_description)]
610
+ elif logical_plan_name in (
611
+ "Project",
612
+ "Aggregate",
613
+ "Sort",
614
+ "UnresolvedWith",
615
+ "UnresolvedHaving",
616
+ "Distinct",
617
+ ):
618
+ expr = execute_logical_plan(logical_plan.logicalPlan()).queries[
619
+ "queries"
620
+ ][0]
621
+ final_sql = f"EXPLAIN USING TEXT {expr}"
622
+ rows = session.sql(final_sql).collect()
623
+ elif (
624
+ logical_plan_name == "InsertIntoStatement"
625
+ or logical_plan_name == "CreateView"
626
+ ):
627
+ expr = execute_logical_plan(
628
+ logical_plan.logicalPlan().query()
629
+ ).queries["queries"][0]
630
+ final_sql = f"EXPLAIN USING TEXT {expr}"
631
+ rows = session.sql(final_sql).collect()
632
+ else:
633
+ # TODO: Support other logical plans
634
+ raise SnowparkConnectNotImplementedError(
635
+ f"{logical_plan_name} is not supported yet with EXPLAIN."
636
+ )
637
+ case "InsertIntoStatement":
638
+ df = execute_logical_plan(logical_plan.query())
639
+ queries = df.queries["queries"]
640
+ if len(queries) != 1:
641
+ raise SnowparkConnectNotImplementedError(
642
+ f"Unexpected number of queries: {len(queries)}"
643
+ )
644
+
645
+ name = get_relation_identifier_name(logical_plan.table(), True)
646
+
647
+ user_columns = [
648
+ _spark_to_snowflake_single_id(str(col))
649
+ for col in as_java_list(logical_plan.userSpecifiedCols())
650
+ ]
651
+ overwrite_str = "OVERWRITE" if logical_plan.overwrite() else ""
652
+ cols_str = "(" + ", ".join(user_columns) + ")" if user_columns else ""
653
+
654
+ try:
655
+ target_table = session.table(name)
656
+ target_schema = target_table.schema
657
+
658
+ # Modify df with NaN → NULL conversion for DECIMAL columns
659
+ modified_columns = []
660
+ for source_field, target_field in zip(
661
+ df.schema.fields, target_schema.fields
662
+ ):
663
+ col_name = source_field.name
664
+ if isinstance(
665
+ target_field.datatype, snowpark.types.DecimalType
666
+ ) and isinstance(
667
+ source_field.datatype,
668
+ (snowpark.types.FloatType, snowpark.types.DoubleType),
669
+ ):
670
+ # Add CASE WHEN to convert NaN to NULL for DECIMAL targets
671
+ # Only apply this to floating-point source columns
672
+ modified_col = (
673
+ snowpark_fn.when(
674
+ snowpark_fn.equal_nan(snowpark_fn.col(col_name)),
675
+ snowpark_fn.lit(None),
676
+ )
677
+ .otherwise(snowpark_fn.col(col_name))
678
+ .alias(col_name)
679
+ )
680
+ modified_columns.append(modified_col)
681
+ else:
682
+ modified_columns.append(snowpark_fn.col(col_name))
683
+
684
+ df = df.select(modified_columns)
685
+ except Exception:
686
+ pass
687
+ queries = df.queries["queries"]
688
+ final_query = queries[0]
689
+ session.sql(
690
+ f"INSERT {overwrite_str} INTO {name} {cols_str} {final_query}",
691
+ ).collect()
692
+ case "MergeIntoTable":
693
+
694
+ def _get_condition_from_action(action, column_mapping, typer):
695
+ condition = None
696
+ if action.condition().isDefined():
697
+ (_, condition_typed_col,) = map_single_column_expression(
698
+ map_logical_plan_expression(
699
+ matched_action.condition().get()
700
+ ),
701
+ column_mapping,
702
+ typer,
703
+ )
704
+ condition = condition_typed_col.col
705
+ return condition
706
+
707
+ def _get_assignments_from_action(
708
+ action,
709
+ column_mapping_source,
710
+ column_mapping_target,
711
+ typer_source,
712
+ typer_target,
713
+ ):
714
+ assignments = dict()
715
+ if (
716
+ action.getClass().getSimpleName() == "InsertAction"
717
+ or action.getClass().getSimpleName() == "UpdateAction"
718
+ ):
719
+ incoming_assignments = as_java_list(action.assignments())
720
+ for assignment in incoming_assignments:
721
+ (key_name, _) = map_single_column_expression(
722
+ map_logical_plan_expression(assignment.key()),
723
+ column_mapping=column_mapping_target,
724
+ typer=typer_source,
725
+ )
726
+
727
+ (_, val_typ_col) = map_single_column_expression(
728
+ map_logical_plan_expression(assignment.value()),
729
+ column_mapping=column_mapping_source,
730
+ typer=typer_target,
731
+ )
732
+
733
+ assignments[key_name] = val_typ_col.col
734
+ elif (
735
+ action.getClass().getSimpleName() == "InsertStarAction"
736
+ or action.getClass().getSimpleName() == "UpdateStarAction"
737
+ ):
738
+ if len(column_mapping_source.columns) != len(
739
+ column_mapping_target.columns
740
+ ):
741
+ raise ValueError(
742
+ "source and target must have the same number of columns for InsertStarAction or UpdateStarAction"
743
+ )
744
+ for i, col in enumerate(column_mapping_target.columns):
745
+ if assignments.get(col.snowpark_name) is not None:
746
+ raise SnowparkConnectNotImplementedError(
747
+ "UpdateStarAction or InsertStarAction is not supported with duplicate columns."
748
+ )
749
+ assignments[col.snowpark_name] = snowpark_fn.col(
750
+ column_mapping_source.columns[i].snowpark_name
751
+ )
752
+ return assignments
753
+
754
+ source_df = map_relation(
755
+ map_logical_plan_relation(logical_plan.sourceTable())
756
+ )
757
+
758
+ plan_id = gen_sql_plan_id()
759
+
760
+ target_df = map_relation(
761
+ map_logical_plan_relation(logical_plan.targetTable(), plan_id)
762
+ )
763
+ for col in target_df._column_map.columns:
764
+ target_df = target_df.with_column_renamed(
765
+ col.snowpark_name, _spark_to_snowflake_single_id(col.spark_name)
766
+ )
767
+ target_df = column_name_handler.with_column_map(
768
+ target_df, target_df.columns, target_df.columns
769
+ )
770
+
771
+ set_plan_id_map(plan_id, target_df)
772
+
773
+ joined_df_before_condition: snowpark.DataFrame = source_df.join(
774
+ target_df
775
+ )
776
+
777
+ column_mapping_for_conditions = column_name_handler.JoinColumnNameMap(
778
+ source_df,
779
+ target_df,
780
+ )
781
+ typer_for_expressions = ExpressionTyper(joined_df_before_condition)
782
+
783
+ (_, merge_condition_typed_col,) = map_single_column_expression(
784
+ map_logical_plan_expression(logical_plan.mergeCondition()),
785
+ column_mapping=column_mapping_for_conditions,
786
+ typer=typer_for_expressions,
787
+ )
788
+
789
+ clauses = []
790
+
791
+ for matched_action in as_java_list(logical_plan.matchedActions()):
792
+ condition = _get_condition_from_action(
793
+ matched_action,
794
+ column_mapping_for_conditions,
795
+ typer_for_expressions,
796
+ )
797
+ if matched_action.getClass().getSimpleName() == "DeleteAction":
798
+ clauses.append(when_matched(condition).delete())
799
+ elif (
800
+ matched_action.getClass().getSimpleName() == "UpdateAction"
801
+ or matched_action.getClass().getSimpleName()
802
+ == "UpdateStarAction"
803
+ ):
804
+ assignments = _get_assignments_from_action(
805
+ matched_action,
806
+ source_df._column_map,
807
+ target_df._column_map,
808
+ ExpressionTyper(source_df),
809
+ ExpressionTyper(target_df),
810
+ )
811
+ clauses.append(when_matched(condition).update(assignments))
812
+
813
+ for not_matched_action in as_java_list(
814
+ logical_plan.notMatchedActions()
815
+ ):
816
+ condition = _get_condition_from_action(
817
+ not_matched_action,
818
+ column_mapping_for_conditions,
819
+ typer_for_expressions,
820
+ )
821
+ if (
822
+ not_matched_action.getClass().getSimpleName() == "InsertAction"
823
+ or not_matched_action.getClass().getSimpleName()
824
+ == "InsertStarAction"
825
+ ):
826
+ assignments = _get_assignments_from_action(
827
+ not_matched_action,
828
+ source_df._column_map,
829
+ target_df._column_map,
830
+ ExpressionTyper(source_df),
831
+ ExpressionTyper(target_df),
832
+ )
833
+ clauses.append(when_not_matched(condition).insert(assignments))
834
+
835
+ if not as_java_list(logical_plan.notMatchedBySourceActions()).isEmpty():
836
+ raise SnowparkConnectNotImplementedError(
837
+ "Snowflake does not support 'not matched by source' actions in MERGE statements."
838
+ )
839
+
840
+ if (
841
+ logical_plan.targetTable().getClass().getSimpleName()
842
+ == "UnresolvedRelation"
843
+ ):
844
+ target_table_name = _spark_to_snowflake(
845
+ logical_plan.targetTable().multipartIdentifier()
846
+ )
847
+ else:
848
+ target_table_name = _spark_to_snowflake(
849
+ logical_plan.targetTable().child().multipartIdentifier()
850
+ )
851
+ session.table(target_table_name).merge(
852
+ source_df, merge_condition_typed_col.col, clauses
853
+ )
854
+ case "DeleteFromTable":
855
+ df = map_relation(map_logical_plan_relation(logical_plan.table()))
856
+ for col in df._column_map.columns:
857
+ df = df.with_column_renamed(
858
+ col.snowpark_name, _spark_to_snowflake_single_id(col.spark_name)
859
+ )
860
+ df = column_name_handler.with_column_map(df, df.columns, df.columns)
861
+
862
+ name = get_relation_identifier_name(logical_plan.table(), True)
863
+ (
864
+ condition_column_name,
865
+ condition_typed_col,
866
+ ) = map_single_column_expression(
867
+ map_logical_plan_expression(logical_plan.condition()),
868
+ df._column_map,
869
+ ExpressionTyper(df),
870
+ )
871
+ session.table(name).delete(condition_typed_col.col)
872
+ case "RenameColumn":
873
+ table_name = get_relation_identifier_name(logical_plan.table(), True)
874
+ column_obj = logical_plan.column()
875
+ old_column_name = ".".join(
876
+ str(part) for part in as_java_list(column_obj.name())
877
+ )
878
+ new_column_name = str(logical_plan.newName())
879
+
880
+ # Pass through to Snowflake
881
+ snowflake_sql = f"ALTER TABLE {table_name} RENAME COLUMN {old_column_name} TO {new_column_name}"
882
+ session.sql(snowflake_sql).collect()
883
+ case "RenameTable":
884
+ name = get_relation_identifier_name(logical_plan.child(), True)
885
+ new_name = _spark_to_snowflake(logical_plan.newName())
886
+
887
+ try:
888
+ session.sql(f"ALTER TABLE {name} RENAME TO {new_name}").collect()
889
+ except Exception as e:
890
+ # This is a trick to rename iceberg tables without having to first sacrifice a query to determine
891
+ # whether the source table is an iceberg table.
892
+ # TODO(SNOW-2118744): such keyword is required for other ALTER TABLE commands against Iceberg tables
893
+ # too.
894
+ if str(e).find("is an Iceberg table") >= 0:
895
+ session.sql(
896
+ f"ALTER ICEBERG TABLE {name} RENAME TO {new_name}"
897
+ ).collect()
898
+ else:
899
+ raise e
900
+ case "ReplaceTableAsSelect":
901
+ _create_table_as_select(logical_plan, mode="overwrite")
902
+ case "ResetCommand":
903
+ key = logical_plan.config().get()
904
+ unset_config_param(get_session_id(), key, session)
905
+ case "SetCatalogAndNamespace":
906
+ # TODO: add catalog setting here
907
+ name = get_relation_identifier_name(logical_plan.child(), True)
908
+ session.sql(f"USE SCHEMA {name}").collect()
909
+ case "SetCommand":
910
+ kv_result_tuple = logical_plan.kv().get()
911
+ key = kv_result_tuple._1()
912
+ val = kv_result_tuple._2().get()
913
+ set_config_param(get_session_id(), key, val, session)
914
+ case "SetNamespaceCommand":
915
+ name = _spark_to_snowflake(logical_plan.namespace())
916
+ session.sql(f"USE SCHEMA {name}").collect()
917
+ case "SetNamespaceLocation" | "SetNamespaceProperties":
918
+ raise SnowparkConnectNotImplementedError(
919
+ "Altering databases is not currently supported."
920
+ )
921
+ case "ShowCreateTable":
922
+ # Handle SHOW CREATE TABLE command
923
+ # Spark: SHOW CREATE TABLE table_name
924
+ # Snowflake: SELECT get_ddl('table', 'table_name')
925
+ table_relation = logical_plan.child()
926
+ table_name = _spark_to_snowflake(table_relation.multipartIdentifier())
927
+
928
+ # Convert to Snowflake get_ddl function
929
+ snowflake_sql = f"SELECT get_ddl('table', '{table_name}') AS ddl"
930
+ rows = session.sql(snowflake_sql).collect()
931
+
932
+ case "ShowCurrentNamespaceCommand":
933
+ name = session.get_current_schema()
934
+ unquoted_name = unquote_if_quoted(name)
935
+ sql = f"SHOW SCHEMAS LIKE '{unquoted_name}'"
936
+ rows = session.sql(sql).collect()
937
+ if not rows:
938
+ rows = None
939
+ case "ShowNamespaces":
940
+ name = get_relation_identifier_name(logical_plan.namespace(), True)
941
+ if name:
942
+ raise SnowparkConnectNotImplementedError(
943
+ "'IN' clause is not supported while listing databases"
944
+ )
945
+ if logical_plan.pattern().isDefined():
946
+ # Snowflake SQL requires a "%" pattern.
947
+ # Snowpark catalog requires a regex and does client-side filtering.
948
+ # Spark, however, uses a regex-like pattern that treats '*' and '|' differently.
949
+ raise SnowparkConnectNotImplementedError(
950
+ "'LIKE' clause is not supported while listing databases"
951
+ )
952
+ rows = session.sql("SHOW SCHEMAS").collect()
953
+ if not rows:
954
+ rows = None
955
+ case "ShowTables" | "ShowTableExtended":
956
+ name = get_relation_identifier_name(logical_plan.namespace(), True)
957
+
958
+ # Get the pattern for filtering
959
+ pattern = None
960
+ if class_name == "ShowTables" and logical_plan.pattern().isDefined():
961
+ pattern = logical_plan.pattern().get()
962
+ elif (
963
+ class_name == "ShowTableExtended"
964
+ and len(logical_plan.pattern()) != 0
965
+ ):
966
+ pattern = logical_plan.pattern()
967
+
968
+ # Execute SHOW TABLES command
969
+ if name:
970
+ rows = session.sql(f"SHOW TABLES IN {name}").collect()
971
+ else:
972
+ rows = session.sql("SHOW TABLES").collect()
973
+
974
+ # Apply pattern filtering if pattern is provided
975
+ # This is workaround to filter using Python regex.
976
+ if pattern and rows:
977
+ rows = _filter_tables_by_pattern(rows, pattern)
978
+ case "ShowViews":
979
+ name = get_relation_identifier_name(logical_plan.namespace(), True)
980
+
981
+ # Get the pattern for filtering
982
+ pattern = (
983
+ logical_plan.pattern().get()
984
+ if logical_plan.pattern().isDefined()
985
+ else None
986
+ )
987
+
988
+ # Execute SHOW VIEWS command
989
+ if name:
990
+ rows = session.sql(f"SHOW VIEWS IN {name}").collect()
991
+ else:
992
+ rows = session.sql("SHOW VIEWS").collect()
993
+
994
+ # Apply pattern filtering if pattern is provided
995
+ if pattern and rows:
996
+ rows = _filter_tables_by_pattern(rows, pattern)
997
+ case "ShowColumns":
998
+ # Handle Spark SQL: SHOW COLUMNS IN table_name FROM database_name
999
+ # Convert to Snowflake SQL: SHOW COLUMNS IN TABLE database_name.table_name
1000
+
1001
+ # Extract table name from ShowColumns logical plan
1002
+ # The child() is an UnresolvedTable object, use the existing helper function
1003
+ table_relation = logical_plan.child()
1004
+ db_and_table_name = as_java_list(table_relation.multipartIdentifier())
1005
+ multi_part_len = len(db_and_table_name)
1006
+ table_name = _spark_to_snowflake(table_relation.multipartIdentifier())
1007
+
1008
+ db_name = None
1009
+ # Get database name if specified in namespace
1010
+ if logical_plan.namespace().isDefined():
1011
+ db_namespace = logical_plan.namespace().get()
1012
+ db_name = _spark_to_snowflake(db_namespace)
1013
+
1014
+ # Build the Snowflake SHOW COLUMNS command
1015
+ if db_name and multi_part_len == 1:
1016
+ # Full qualified table name: db.table
1017
+ full_table_name = f"{db_name}.{table_name}"
1018
+ snowflake_cmd = f"SHOW COLUMNS IN TABLE {full_table_name}"
1019
+ else:
1020
+ if db_name and multi_part_len == 2:
1021
+ # Check db_name is same as in the full table name
1022
+ if (
1023
+ _spark_to_snowflake_single_id(
1024
+ str(db_and_table_name[0])
1025
+ ).casefold()
1026
+ != db_name.casefold()
1027
+ ):
1028
+ raise AnalysisException(
1029
+ f"database name is not matching:{db_name} and {db_and_table_name[0]}"
1030
+ )
1031
+
1032
+ # Just table name
1033
+ snowflake_cmd = f"SHOW COLUMNS IN TABLE {table_name}"
1034
+
1035
+ rows = session.sql(snowflake_cmd).collect()
1036
+ case "TruncateTable":
1037
+ name = get_relation_identifier_name(logical_plan.table(), True)
1038
+ session.sql(f"TRUNCATE TABLE {name}").collect()
1039
+
1040
+ case command if (
1041
+ command.startswith("Alter")
1042
+ or command.startswith("Create")
1043
+ or command.startswith("Drop")
1044
+ or command.startswith("Rename")
1045
+ or command.startswith("Replace")
1046
+ or command.startswith("Set")
1047
+ or command.startswith("Truncate")
1048
+ or command.startswith("AddColumns")
1049
+ ):
1050
+ parsed_sql = sqlglot.parse_one(sql_string, dialect="spark").transform(
1051
+ _normalize_identifiers
1052
+ )
1053
+ snowflake_sql = parsed_sql.sql(dialect="snowflake")
1054
+ session.sql(snowflake_sql).collect()
1055
+ case command if command.startswith("Describe") or command.startswith(
1056
+ "Show"
1057
+ ):
1058
+ parsed_sql = sqlglot.parse_one(sql_string, dialect="spark").transform(
1059
+ _normalize_identifiers
1060
+ )
1061
+ snowflake_sql = parsed_sql.sql(dialect="snowflake")
1062
+ if command.startswith("Show"):
1063
+ if snowflake_sql.startswith("SHOW TBLPROPERTIES"):
1064
+ # Snowflake doesn't support TBLPROPERTIES, EXTENDED.
1065
+ return pandas.DataFrame({"": [""]}), ""
1066
+
1067
+ rows = session.sql(snowflake_sql).collect()
1068
+ case _:
1069
+ execute_logical_plan(logical_plan)
1070
+ return None, None
1071
+ else:
1072
+ # spark.sql("select or cte+select") queries should be executed lazily.
1073
+ # This returns an empty dataframe and empty schema.
1074
+ # if is_sql_select_statement(_trim_sql_string(sql_string)):
1075
+ if _is_sql_select_statement_helper(sql_string):
1076
+ return None, None
1077
+ session = snowpark.Session.get_active_session()
1078
+ sql_df = session.sql(sql_string)
1079
+ columns = sql_df.columns
1080
+ column_name_handler.with_column_map(sql_df, columns, columns)
1081
+ rows = sql_df.collect()
1082
+
1083
+ if rows:
1084
+ return pandas.DataFrame(rows), ""
1085
+ return pandas.DataFrame({"": [""]}), ""
1086
+
1087
+
1088
+ def get_sql_passthrough() -> bool:
1089
+ return get_boolean_session_config_param("snowpark.connect.sql.passthrough")
1090
+
1091
+
1092
+ def map_sql(rel: relation_proto.Relation) -> snowpark.DataFrame:
1093
+ """
1094
+ Map a SQL string to a DataFrame.
1095
+
1096
+ The SQL string is executed and the resulting DataFrame is returned.
1097
+
1098
+ In passthough mode as True, SAS calls session.sql() and not calling Spark Parser.
1099
+ This is to mitigate any issue not covered by spark logical plan to protobuf conversion.
1100
+ """
1101
+
1102
+ snowpark_connect_sql_passthrough = get_sql_passthrough()
1103
+
1104
+ if not snowpark_connect_sql_passthrough:
1105
+ logical_plan = sql_parser().parseQuery(rel.sql.query)
1106
+
1107
+ parsed_pos_args = parse_pos_args(logical_plan, rel.sql.pos_args)
1108
+ set_sql_args(rel.sql.args, parsed_pos_args)
1109
+
1110
+ return execute_logical_plan(logical_plan)
1111
+ else:
1112
+ session = snowpark.Session.get_active_session()
1113
+ sql_df = session.sql(rel.sql.query)
1114
+ columns = sql_df.columns
1115
+ return column_name_handler.with_column_map(sql_df, columns, columns)
1116
+
1117
+
1118
+ def map_logical_plan_relation(
1119
+ rel, plan_id: int | None = None
1120
+ ) -> relation_proto.Relation:
1121
+ if plan_id is None:
1122
+ plan_id = gen_sql_plan_id()
1123
+ session = get_or_create_snowpark_session()
1124
+
1125
+ class_name = str(rel.getClass().getSimpleName())
1126
+ match class_name:
1127
+ case "Aggregate":
1128
+ with push_sql_scope():
1129
+ input = map_logical_plan_relation(rel.child())
1130
+
1131
+ # For LCA support in GROUP BY, we need to extract aliases from the aggregate expressions
1132
+ # In Spark SQL, when you write "SELECT a as k, COUNT(b) FROM table GROUP BY k",
1133
+ # the aliases are defined in the aggregateExpressions, not in a separate Project node
1134
+ alias_map = {}
1135
+
1136
+ # Extract aliases from the aggregate expressions (SELECT clause)
1137
+ alias_map = {}
1138
+ for agg_expr in as_java_list(rel.aggregateExpressions()):
1139
+ if str(agg_expr.getClass().getSimpleName()) == "Alias":
1140
+ alias_map[str(agg_expr.name())] = agg_expr.child()
1141
+
1142
+ def substitute_lca_in_grouping_expr(expr):
1143
+ """Substitute LCA references with original expressions and handle ordinal references"""
1144
+ expr_class = str(expr.getClass().getSimpleName())
1145
+
1146
+ # Handle ordinal references (e.g., GROUP BY 1, GROUP BY 2)
1147
+ # Note: Quoted column names like GROUP BY "1" come through as UnresolvedAttribute,
1148
+ # while unquoted ordinals like GROUP BY 1 come through as Literal with integer type
1149
+ if expr_class == "Literal":
1150
+ # Check if this is an integer literal (ordinal reference)
1151
+ if hasattr(expr, "dataType") and str(
1152
+ expr.dataType().typeName()
1153
+ ) in ["integer", "long"]:
1154
+ ordinal_pos = expr.value()
1155
+ agg_expressions = as_java_list(rel.aggregateExpressions())
1156
+
1157
+ # Validate ordinal is in valid range (1-based indexing)
1158
+ if isinstance(ordinal_pos, int) and 1 <= ordinal_pos <= len(
1159
+ agg_expressions
1160
+ ):
1161
+ # Return the expression from the SELECT clause at the ordinal position
1162
+ target_expr = agg_expressions[
1163
+ ordinal_pos - 1
1164
+ ] # Convert to 0-based index
1165
+
1166
+ # If the target expression is an alias, return the underlying expression
1167
+ if (
1168
+ str(target_expr.getClass().getSimpleName())
1169
+ == "Alias"
1170
+ ):
1171
+ return target_expr.child()
1172
+ else:
1173
+ return target_expr
1174
+ # If ordinal is out of range, let it fall through to generate an error later
1175
+
1176
+ # Handle named LCA references (existing logic)
1177
+ # This handles cases like GROUP BY "1" (quoted column names)
1178
+ if expr_class != "UnresolvedAttribute":
1179
+ return expr
1180
+
1181
+ attr_parts = as_java_list(expr.nameParts())
1182
+ if len(attr_parts) == 1:
1183
+ attr_name = str(attr_parts[0])
1184
+ return alias_map.get(attr_name, expr)
1185
+
1186
+ return expr
1187
+
1188
+ group_type = snowflake_proto.Aggregate.GROUP_TYPE_GROUPBY
1189
+
1190
+ grouping_sets: list[snowflake_proto.Aggregate.GroupingSets] = []
1191
+
1192
+ group_expression_list = as_java_list(rel.groupingExpressions())
1193
+ for exp in group_expression_list:
1194
+ match str(exp.getClass().getSimpleName()):
1195
+ case "Rollup":
1196
+ group_type = snowflake_proto.Aggregate.GROUP_TYPE_ROLLUP
1197
+ case "Cube":
1198
+ group_type = snowflake_proto.Aggregate.GROUP_TYPE_CUBE
1199
+ case "GroupingSets":
1200
+ if not exp.userGivenGroupByExprs().isEmpty():
1201
+ raise SnowparkConnectNotImplementedError(
1202
+ "User-defined group by expressions are not supported"
1203
+ )
1204
+ group_type = (
1205
+ snowflake_proto.Aggregate.GROUP_TYPE_GROUPING_SETS
1206
+ )
1207
+ grouping_sets = [
1208
+ snowflake_proto.Aggregate.GroupingSets(
1209
+ grouping_set=[
1210
+ map_logical_plan_expression(e)
1211
+ for e in as_java_list(grouping_set)
1212
+ ]
1213
+ )
1214
+ for grouping_set in as_java_list(exp.groupingSets())
1215
+ ]
1216
+
1217
+ if group_type != snowflake_proto.Aggregate.GROUP_TYPE_GROUPBY:
1218
+ if len(group_expression_list) != 1:
1219
+ raise SnowparkConnectNotImplementedError(
1220
+ "Multiple grouping expressions are not supported"
1221
+ )
1222
+ if group_type == snowflake_proto.Aggregate.GROUP_TYPE_GROUPING_SETS:
1223
+ group_expression_list = [] # TODO: exp.userGivenGroupByExprs()?
1224
+ else:
1225
+ group_expression_list = as_java_list(
1226
+ group_expression_list[0].children()
1227
+ )
1228
+
1229
+ grouping_expressions = [
1230
+ map_logical_plan_expression(substitute_lca_in_grouping_expr(e))
1231
+ for e in group_expression_list
1232
+ ]
1233
+
1234
+ aggregate_expressions = [
1235
+ map_logical_plan_expression(e)
1236
+ for e in as_java_list(rel.aggregateExpressions())
1237
+ ]
1238
+
1239
+ any_proto = Any()
1240
+ any_proto.Pack(
1241
+ snowflake_proto.Extension(
1242
+ aggregate=snowflake_proto.Aggregate(
1243
+ input=input,
1244
+ group_type=group_type,
1245
+ grouping_expressions=grouping_expressions,
1246
+ aggregate_expressions=aggregate_expressions,
1247
+ grouping_sets=grouping_sets,
1248
+ )
1249
+ )
1250
+ )
1251
+ proto = relation_proto.Relation(extension=any_proto)
1252
+ case "Distinct":
1253
+ proto = relation_proto.Relation(
1254
+ deduplicate=relation_proto.Deduplicate(
1255
+ input=map_logical_plan_relation(rel.child())
1256
+ )
1257
+ )
1258
+ case "Except":
1259
+ proto = relation_proto.Relation(
1260
+ set_op=relation_proto.SetOperation(
1261
+ left_input=map_logical_plan_relation(rel.left()),
1262
+ right_input=map_logical_plan_relation(rel.right()),
1263
+ set_op_type=relation_proto.SetOperation.SET_OP_TYPE_EXCEPT,
1264
+ is_all=rel.isAll(),
1265
+ )
1266
+ )
1267
+ case "Filter":
1268
+ proto = relation_proto.Relation(
1269
+ filter=relation_proto.Filter(
1270
+ input=map_logical_plan_relation(rel.child()),
1271
+ condition=map_logical_plan_expression(rel.condition()),
1272
+ )
1273
+ )
1274
+ case "GlobalLimit":
1275
+ # TODO: What's a global limit and what's a local limit?
1276
+ proto = map_logical_plan_relation(rel.child())
1277
+ case "Intersect":
1278
+ proto = relation_proto.Relation(
1279
+ set_op=relation_proto.SetOperation(
1280
+ left_input=map_logical_plan_relation(rel.left()),
1281
+ right_input=map_logical_plan_relation(rel.right()),
1282
+ set_op_type=relation_proto.SetOperation.SET_OP_TYPE_INTERSECT,
1283
+ is_all=rel.isAll(),
1284
+ )
1285
+ )
1286
+ case "Join":
1287
+ join_type_sql = str(rel.joinType().sql())
1288
+ join_type_name = f"JOIN_TYPE_{join_type_sql.replace(' ', '_')}"
1289
+ condition = rel.condition()
1290
+
1291
+ left = map_logical_plan_relation(rel.left())
1292
+ right = map_logical_plan_relation(rel.right())
1293
+ join_condition = (
1294
+ map_logical_plan_expression(condition.get())
1295
+ if condition.isDefined()
1296
+ else None
1297
+ )
1298
+
1299
+ if "_NATURAL" in join_type_name:
1300
+ join_type_name = join_type_name.replace("_NATURAL", "")
1301
+ natural_join_base_offset = NATURAL_JOIN_TYPE_BASE
1302
+ else:
1303
+ natural_join_base_offset = 0
1304
+
1305
+ if "_USING" in join_type_name:
1306
+ using_columns = as_java_list(rel.joinType().usingColumns())
1307
+ join_type_name = join_type_name.replace("_USING", "")
1308
+ else:
1309
+ using_columns = []
1310
+
1311
+ proto = relation_proto.Relation(
1312
+ join=relation_proto.Join(
1313
+ left=left,
1314
+ right=right,
1315
+ join_condition=join_condition,
1316
+ join_type=getattr(relation_proto.Join.JoinType, join_type_name)
1317
+ + natural_join_base_offset,
1318
+ using_columns=using_columns,
1319
+ )
1320
+ )
1321
+ case "LocalLimit":
1322
+
1323
+ if rel.limitExpr().getClass().getSimpleName() == "Literal":
1324
+ limit_val = rel.limitExpr().value()
1325
+ else:
1326
+ expr_proto = map_logical_plan_expression(rel.limitExpr())
1327
+ session = snowpark.Session.get_active_session()
1328
+ m = ColumnNameMap([], [], None)
1329
+ expr = map_single_column_expression(
1330
+ expr_proto, m, ExpressionTyper.dummy_typer(session)
1331
+ )
1332
+ limit_val = session.range(1).select(expr[1].col).collect()[0][0]
1333
+
1334
+ proto = relation_proto.Relation(
1335
+ limit=relation_proto.Limit(
1336
+ input=map_logical_plan_relation(rel.child()),
1337
+ limit=limit_val,
1338
+ )
1339
+ )
1340
+ case "Offset":
1341
+ proto = relation_proto.Relation(
1342
+ offset=relation_proto.Offset(
1343
+ input=map_logical_plan_relation(rel.child()),
1344
+ offset=rel.offsetExpr().value(),
1345
+ )
1346
+ )
1347
+ case "OneRowRelation":
1348
+ proto = relation_proto.Relation(project=relation_proto.Project())
1349
+ case "Pivot":
1350
+ pivot_column = map_logical_plan_expression(rel.pivotColumn())
1351
+ session = snowpark.Session.get_active_session()
1352
+ m = ColumnNameMap([], [], None)
1353
+
1354
+ pivot_values = [
1355
+ map_logical_plan_expression(e) for e in as_java_list(rel.pivotValues())
1356
+ ]
1357
+
1358
+ pivot_literals = []
1359
+
1360
+ for expr_proto in pivot_values:
1361
+ expr = map_single_column_expression(
1362
+ expr_proto, m, ExpressionTyper.dummy_typer(session)
1363
+ )
1364
+ value = session.range(1).select(expr[1].col).collect()[0][0]
1365
+ pivot_literals.append(
1366
+ expressions_proto.Expression.Literal(string=str(value))
1367
+ )
1368
+
1369
+ aggregate_expressions = [
1370
+ map_logical_plan_expression(e) for e in as_java_list(rel.aggregates())
1371
+ ]
1372
+
1373
+ proto = relation_proto.Relation(
1374
+ aggregate=relation_proto.Aggregate(
1375
+ input=map_logical_plan_relation(rel.child()),
1376
+ aggregate_expressions=aggregate_expressions,
1377
+ group_type=relation_proto.Aggregate.GroupType.GROUP_TYPE_PIVOT,
1378
+ pivot=relation_proto.Aggregate.Pivot(
1379
+ col=pivot_column, values=pivot_literals
1380
+ ),
1381
+ )
1382
+ )
1383
+
1384
+ case "PlanWithUnresolvedIdentifier":
1385
+ expr_proto = map_logical_plan_expression(rel.identifierExpr())
1386
+ session = snowpark.Session.get_active_session()
1387
+ m = ColumnNameMap([], [], None)
1388
+ expr = map_single_column_expression(
1389
+ expr_proto, m, ExpressionTyper.dummy_typer(session)
1390
+ )
1391
+ value = session.range(1).select(expr[1].col).collect()[0][0]
1392
+
1393
+ proto = relation_proto.Relation(
1394
+ read=relation_proto.Read(
1395
+ named_table=relation_proto.Read.NamedTable(
1396
+ unparsed_identifier=value,
1397
+ )
1398
+ )
1399
+ )
1400
+ case "Project":
1401
+ with push_sql_scope():
1402
+ input = map_logical_plan_relation(rel.child())
1403
+ expressions = [
1404
+ map_logical_plan_expression(e)
1405
+ for e in as_java_list(rel.projectList())
1406
+ ]
1407
+ proto = relation_proto.Relation(
1408
+ project=relation_proto.Project(
1409
+ input=input,
1410
+ expressions=expressions,
1411
+ )
1412
+ )
1413
+ case "Sort":
1414
+ proto = relation_proto.Relation(
1415
+ sort=relation_proto.Sort(
1416
+ input=map_logical_plan_relation(rel.child()),
1417
+ order=[
1418
+ map_logical_plan_expression(e).sort_order
1419
+ for e in as_java_list(rel.order())
1420
+ ],
1421
+ )
1422
+ )
1423
+ case "SubqueryAlias":
1424
+ alias = str(rel.alias())
1425
+ proto = relation_proto.Relation(
1426
+ subquery_alias=relation_proto.SubqueryAlias(
1427
+ input=map_logical_plan_relation(rel.child()),
1428
+ alias=alias,
1429
+ )
1430
+ )
1431
+ set_sql_plan_name(alias, plan_id)
1432
+ case "Union":
1433
+ children = as_java_list(rel.children())
1434
+ assert len(children) == 2, len(children)
1435
+
1436
+ proto = relation_proto.Relation(
1437
+ set_op=relation_proto.SetOperation(
1438
+ left_input=map_logical_plan_relation(children[0]),
1439
+ right_input=map_logical_plan_relation(children[1]),
1440
+ set_op_type=relation_proto.SetOperation.SET_OP_TYPE_UNION,
1441
+ by_name=rel.byName(),
1442
+ allow_missing_columns=rel.allowMissingCol(),
1443
+ )
1444
+ )
1445
+ case "Unpivot":
1446
+ value_column_names = [e for e in as_java_list(rel.valueColumnNames())]
1447
+ variable_column_name = rel.variableColumnName()
1448
+
1449
+ values = []
1450
+ for e1 in as_java_list(rel.values().get()):
1451
+ for e in as_java_list(e1):
1452
+ values.append(map_logical_plan_expression(e))
1453
+
1454
+ # Need to find ids which are not part of values and remaining cols of df
1455
+ input_rel = map_logical_plan_relation(rel.child())
1456
+ input_df: snowpark.DataFrame = map_relation(input_rel)
1457
+ column_map = input_df._column_map
1458
+ typer = ExpressionTyper(input_df)
1459
+ unpivot_spark_names = []
1460
+ for v in values:
1461
+ spark_name, typed_column = map_single_column_expression(
1462
+ v, column_map, typer
1463
+ )
1464
+ unpivot_spark_names.append(spark_name)
1465
+
1466
+ id_cols = []
1467
+ for column in input_df.columns:
1468
+ spark_column = (
1469
+ column_map.get_spark_column_name_from_snowpark_column_name(column)
1470
+ )
1471
+ if spark_column not in unpivot_spark_names:
1472
+ id_cols.append(
1473
+ expressions_proto.Expression(
1474
+ unresolved_attribute=expressions_proto.Expression.UnresolvedAttribute(
1475
+ unparsed_identifier=spark_column
1476
+ )
1477
+ )
1478
+ )
1479
+
1480
+ proto = relation_proto.Relation(
1481
+ unpivot=relation_proto.Unpivot(
1482
+ input=input_rel,
1483
+ ids=id_cols,
1484
+ values=relation_proto.Unpivot.Values(values=values),
1485
+ variable_column_name=variable_column_name,
1486
+ value_column_name=value_column_names[0],
1487
+ )
1488
+ )
1489
+ case "UnresolvedHaving":
1490
+ proto = relation_proto.Relation(
1491
+ filter=relation_proto.Filter(
1492
+ input=map_logical_plan_relation(rel.child()),
1493
+ condition=map_logical_plan_expression(rel.havingCondition()),
1494
+ )
1495
+ )
1496
+ case "UnresolvedHint":
1497
+ proto = relation_proto.Relation(
1498
+ hint=relation_proto.Hint(
1499
+ input=map_logical_plan_relation(rel.child()),
1500
+ name=str(rel.name()),
1501
+ parameters=[
1502
+ map_logical_plan_expression(e)
1503
+ for e in as_java_list(rel.parameters())
1504
+ ],
1505
+ )
1506
+ )
1507
+ case "UnresolvedInlineTable":
1508
+ names = [str(name) for name in as_java_list(rel.names())]
1509
+ rows = (
1510
+ relation_proto.Relation(
1511
+ common=relation_proto.RelationCommon(
1512
+ plan_id=gen_sql_plan_id(),
1513
+ ),
1514
+ project=relation_proto.Project(
1515
+ expressions=(
1516
+ expressions_proto.Expression(
1517
+ alias=expressions_proto.Expression.Alias(
1518
+ expr=map_logical_plan_expression(val),
1519
+ name=[name],
1520
+ )
1521
+ )
1522
+ for name, val in zip(names, as_java_list(row))
1523
+ ),
1524
+ ),
1525
+ )
1526
+ for row in as_java_list(rel.rows())
1527
+ )
1528
+
1529
+ proto = reduce(
1530
+ lambda left, right: relation_proto.Relation(
1531
+ common=relation_proto.RelationCommon(
1532
+ plan_id=gen_sql_plan_id(),
1533
+ ),
1534
+ set_op=relation_proto.SetOperation(
1535
+ left_input=left,
1536
+ right_input=right,
1537
+ set_op_type=relation_proto.SetOperation.SET_OP_TYPE_UNION,
1538
+ is_all=True,
1539
+ ),
1540
+ ),
1541
+ rows,
1542
+ )
1543
+ case "UnresolvedRelation":
1544
+ name = str(rel.name())
1545
+ set_sql_plan_name(name, plan_id)
1546
+
1547
+ cte_proto = _ctes.get().get(name)
1548
+ if cte_proto is not None:
1549
+ # The name corresponds to a `WITH` alias rather than a table.
1550
+ # TODO: We currently evaluate the query each time its alias is used;
1551
+ # we should eventually start using `WITH` in Snowflake SQL.
1552
+ proto = cte_proto
1553
+ else:
1554
+ tmp_views = _get_current_temp_objects()
1555
+ current_schema = session.connection.schema
1556
+ from_table = (
1557
+ CURRENT_CATALOG_NAME,
1558
+ current_schema,
1559
+ name,
1560
+ )
1561
+ if from_table in tmp_views:
1562
+ _accessing_temp_object.set(True)
1563
+ proto = relation_proto.Relation(
1564
+ read=relation_proto.Read(
1565
+ named_table=relation_proto.Read.NamedTable(
1566
+ unparsed_identifier=name,
1567
+ )
1568
+ )
1569
+ )
1570
+ case "UnresolvedSubqueryColumnAliases":
1571
+ child = map_logical_plan_relation(rel.child())
1572
+ aliases = [str(a) for a in as_java_list(rel.outputColumnNames())]
1573
+ any_proto = Any()
1574
+ any_proto.Pack(
1575
+ snowflake_proto.Extension(
1576
+ subquery_column_aliases=snowflake_proto.SubqueryColumnAliases(
1577
+ input=child,
1578
+ aliases=aliases,
1579
+ )
1580
+ )
1581
+ )
1582
+ proto = relation_proto.Relation(extension=any_proto)
1583
+ case "UnresolvedTableValuedFunction":
1584
+ name = ".".join(str(part) for part in as_java_list(rel.name())).lower()
1585
+ args = [
1586
+ map_logical_plan_expression(exp)
1587
+ for exp in as_java_list(rel.functionArgs())
1588
+ ]
1589
+
1590
+ match name:
1591
+ case "range":
1592
+ m = ColumnNameMap([], [], None)
1593
+ session = snowpark.Session.get_active_session()
1594
+ args = (
1595
+ session.range(1)
1596
+ .select(
1597
+ [
1598
+ map_single_column_expression(arg, m, None)[1].col
1599
+ for arg in args
1600
+ ]
1601
+ )
1602
+ .collect()[0]
1603
+ )
1604
+
1605
+ start, step = 0, 1
1606
+ match args:
1607
+ case [_]:
1608
+ [end] = args
1609
+ case [_, _]:
1610
+ [start, end] = args
1611
+ case [_, _, _]:
1612
+ [start, end, step] = args
1613
+
1614
+ proto = relation_proto.Relation(
1615
+ range=relation_proto.Range(
1616
+ start=start,
1617
+ end=end,
1618
+ step=step,
1619
+ )
1620
+ )
1621
+ case udtf_name if udtf_name in snowpark.Session.get_active_session()._udtfs:
1622
+ # TODO: Table arguments are now expressions, too, so we shouldn't need to handle them here;
1623
+ # instead, handle SubqueryExpression.SUBQUERY_TYPE_TABLE_ARG in relation.map_extension.
1624
+ table_args = []
1625
+ non_table_args = []
1626
+ for i, arg in enumerate(args):
1627
+ extension = snowflake_exp_proto.ExpExtension()
1628
+ if (
1629
+ arg.extension.Unpack(extension)
1630
+ and extension.subquery_expression.subquery_type
1631
+ == snowflake_exp_proto.SubqueryExpression.SUBQUERY_TYPE_TABLE_ARG
1632
+ ):
1633
+ table_args.append(
1634
+ snowflake_proto.TableArgumentInfo(
1635
+ table_argument=extension.subquery_expression.input,
1636
+ table_argument_idx=i,
1637
+ )
1638
+ )
1639
+ else:
1640
+ non_table_args.append(arg)
1641
+
1642
+ if table_args:
1643
+ any_proto = Any()
1644
+ any_proto.Pack(
1645
+ snowflake_proto.Extension(
1646
+ udtf_with_table_arguments=snowflake_proto.UDTFWithTableArguments(
1647
+ function_name=name,
1648
+ arguments=non_table_args,
1649
+ table_arguments=table_args,
1650
+ )
1651
+ )
1652
+ )
1653
+ proto = relation_proto.Relation(extension=any_proto)
1654
+ else:
1655
+ proto = relation_proto.Relation(
1656
+ project=relation_proto.Project(
1657
+ expressions=[
1658
+ expressions_proto.Expression(
1659
+ unresolved_function=expressions_proto.Expression.UnresolvedFunction(
1660
+ function_name=name,
1661
+ arguments=args,
1662
+ )
1663
+ )
1664
+ ],
1665
+ ),
1666
+ )
1667
+ case other:
1668
+ proto = relation_proto.Relation(
1669
+ project=relation_proto.Project(
1670
+ expressions=[
1671
+ expressions_proto.Expression(
1672
+ unresolved_function=expressions_proto.Expression.UnresolvedFunction(
1673
+ function_name=name,
1674
+ arguments=args,
1675
+ )
1676
+ )
1677
+ ],
1678
+ ),
1679
+ )
1680
+ case "UnresolvedWith":
1681
+ with _push_cte_scope():
1682
+ for cte in as_java_list(rel.cteRelations()):
1683
+ name = str(cte._1())
1684
+ cte_proto = map_logical_plan_relation(cte._2())
1685
+ _ctes.get()[name] = cte_proto
1686
+
1687
+ proto = map_logical_plan_relation(rel.child())
1688
+ case "LateralJoin":
1689
+ left = map_logical_plan_relation(rel.left())
1690
+ right = map_logical_plan_relation(rel.right().plan())
1691
+ any_proto = Any()
1692
+ any_proto.Pack(
1693
+ snowflake_proto.Extension(
1694
+ lateral_join=snowflake_proto.LateralJoin(
1695
+ left=left,
1696
+ right=right,
1697
+ )
1698
+ )
1699
+ )
1700
+ proto = relation_proto.Relation(extension=any_proto)
1701
+ case "WithWindowDefinition":
1702
+ map_obj = as_java_map(rel.windowDefinitions())
1703
+ with _push_window_specs_scope():
1704
+ for key, window_spec in map_obj.items():
1705
+ _window_specs.get()[key] = window_spec
1706
+ proto = map_logical_plan_relation(rel.child())
1707
+ case "Generate":
1708
+ input_relation = map_logical_plan_relation(rel.child())
1709
+ generator_output_list = as_java_list(rel.generatorOutput())
1710
+ generator_output_list_expressions = [
1711
+ map_logical_plan_expression(e) for e in generator_output_list
1712
+ ]
1713
+ qualifier = rel.qualifier().get() if rel.qualifier().isDefined() else None
1714
+ function_name = rel.generator().name().toString()
1715
+ func_arguments = [
1716
+ map_logical_plan_expression(e)
1717
+ for e in as_java_list(rel.generator().children())
1718
+ ]
1719
+ unresolved_fun_proto = expressions_proto.Expression.UnresolvedFunction(
1720
+ function_name=function_name, arguments=func_arguments
1721
+ )
1722
+
1723
+ aliased_proto = unresolved_fun_proto
1724
+ if generator_output_list.size() > 0:
1725
+ aliased_proto = expressions_proto.Expression(
1726
+ alias=expressions_proto.Expression.Alias(
1727
+ expr=expressions_proto.Expression(
1728
+ unresolved_function=unresolved_fun_proto,
1729
+ ),
1730
+ name=[attribute.name() for attribute in generator_output_list],
1731
+ )
1732
+ )
1733
+
1734
+ # TODO: Fix the bug in snowpark where if we select posexplode with *, it would return columns
1735
+ # generated by posexplode two times plus all the other columns
1736
+ # Ideal way should have been to do this
1737
+ # unresolved_star_expr = expressions_proto.Expression(
1738
+ # unresolved_attribute=expressions_proto.Expression.UnresolvedAttribute(
1739
+ # unparsed_identifier="*",
1740
+ # )
1741
+ # )
1742
+ # generator_dataframe_proto.project.expressions.append(
1743
+ # unresolved_star_expr
1744
+ # )
1745
+
1746
+ # This is a workaround to fix the bug in snowpark where if we select posexplode with *, it would return wrong columns
1747
+ input_df = map_relation(input_relation)
1748
+ spark_columns = input_df._column_map.get_spark_columns()
1749
+ column_expressions = [
1750
+ expressions_proto.Expression(
1751
+ unresolved_attribute=expressions_proto.Expression.UnresolvedAttribute(
1752
+ unparsed_identifier=spark_column
1753
+ )
1754
+ )
1755
+ for spark_column in spark_columns
1756
+ ]
1757
+
1758
+ generator_dataframe_proto = relation_proto.Relation(
1759
+ project=relation_proto.Project(
1760
+ input=input_relation,
1761
+ expressions=[aliased_proto, *column_expressions],
1762
+ )
1763
+ )
1764
+ if qualifier is not None and qualifier.lower() != "as":
1765
+ generator_dataframe_proto = relation_proto.Relation(
1766
+ with_columns=relation_proto.WithColumns(
1767
+ input=generator_dataframe_proto,
1768
+ aliases=[
1769
+ expressions_proto.Expression.Alias(
1770
+ expr=expressions_proto.Expression(
1771
+ unresolved_function=expressions_proto.Expression.UnresolvedFunction(
1772
+ function_name="struct",
1773
+ arguments=generator_output_list_expressions,
1774
+ )
1775
+ ),
1776
+ name=[qualifier],
1777
+ )
1778
+ ],
1779
+ )
1780
+ )
1781
+ proto = generator_dataframe_proto
1782
+ case other:
1783
+ raise SnowparkConnectNotImplementedError(f"Unimplemented relation: {other}")
1784
+
1785
+ proto.common.plan_id = plan_id
1786
+
1787
+ return proto
1788
+
1789
+
1790
+ def get_relation_identifier_name(name_obj, is_multi_part: bool = False) -> str:
1791
+ if name_obj.getClass().getSimpleName() == "PlanWithUnresolvedIdentifier":
1792
+ # IDENTIFIER(<table_name>)
1793
+ expr_proto = map_logical_plan_expression(name_obj.identifierExpr())
1794
+ session = snowpark.Session.get_active_session()
1795
+ m = ColumnNameMap([], [], None)
1796
+ expr = map_single_column_expression(
1797
+ expr_proto, m, ExpressionTyper.dummy_typer(session)
1798
+ )
1799
+ name = session.range(1).select(expr[1].col).collect()[0][0]
1800
+ else:
1801
+ if is_multi_part:
1802
+ name = _spark_to_snowflake(name_obj.multipartIdentifier())
1803
+ else:
1804
+ name = _spark_to_snowflake(name_obj.nameParts())
1805
+
1806
+ return name
1807
+
1808
+
1809
+ def _convert_spark_pattern_to_regex(pattern: str) -> str:
1810
+ """
1811
+ Convert Spark LIKE pattern to Python regex pattern.
1812
+
1813
+ In Spark LIKE patterns:
1814
+ - '*' matches 0 or more characters (equivalent to '.*' in regex)
1815
+ - '|' is used to separate multiple patterns (equivalent to '|' in regex)
1816
+ - Everything else works like regular regex patterns
1817
+
1818
+ Args:
1819
+ pattern: Spark LIKE pattern string
1820
+
1821
+ Returns:
1822
+ Python regex pattern string
1823
+ """
1824
+ if not pattern:
1825
+ return ""
1826
+
1827
+ # Split by '|' to handle multiple patterns
1828
+ patterns = pattern.split("|")
1829
+ regex_patterns = []
1830
+
1831
+ for p in patterns:
1832
+ p = p.strip()
1833
+ # Replace * with .* for wildcard matching, but preserve other regex characters
1834
+ # We need to be careful to only replace standalone * not part of other patterns
1835
+ converted = p.replace("*", ".*")
1836
+ regex_patterns.append(converted)
1837
+
1838
+ # Join patterns with | for OR matching
1839
+ return "|".join(regex_patterns)
1840
+
1841
+
1842
+ def _filter_tables_by_pattern(tables: list, pattern: str) -> list:
1843
+ """
1844
+ Filter table list by Spark LIKE pattern.
1845
+
1846
+ Args:
1847
+ tables: List of table rows from Snowflake SHOW TABLES
1848
+ pattern: Spark LIKE pattern
1849
+
1850
+ Returns:
1851
+ Filtered list of tables matching the pattern
1852
+ """
1853
+ if not pattern or not tables:
1854
+ return tables
1855
+
1856
+ regex_pattern = _convert_spark_pattern_to_regex(pattern)
1857
+ if not regex_pattern:
1858
+ return tables
1859
+
1860
+ # Compile regex for case-insensitive matching (as per Spark docs)
1861
+ compiled_regex = re.compile(regex_pattern, re.IGNORECASE)
1862
+
1863
+ filtered_tables = []
1864
+ for table in tables:
1865
+ # Table name is typically the second column in SHOW TABLES output
1866
+ table_name = table[1] if len(table) > 1 else str(table[0])
1867
+ if compiled_regex.search(table_name):
1868
+ filtered_tables.append(table)
1869
+
1870
+ return filtered_tables
1871
+
1872
+
1873
+ def _escape_sql_comment(comment: str) -> str:
1874
+ return str(comment).replace("'", "''").replace("\\", "\\\\")