snowpark-connect 0.20.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of snowpark-connect might be problematic. Click here for more details.

Files changed (879) hide show
  1. snowflake/snowpark_connect/__init__.py +23 -0
  2. snowflake/snowpark_connect/analyze_plan/__init__.py +3 -0
  3. snowflake/snowpark_connect/analyze_plan/map_tree_string.py +38 -0
  4. snowflake/snowpark_connect/column_name_handler.py +735 -0
  5. snowflake/snowpark_connect/config.py +576 -0
  6. snowflake/snowpark_connect/constants.py +47 -0
  7. snowflake/snowpark_connect/control_server.py +52 -0
  8. snowflake/snowpark_connect/dataframe_name_handler.py +54 -0
  9. snowflake/snowpark_connect/date_time_format_mapping.py +399 -0
  10. snowflake/snowpark_connect/empty_dataframe.py +18 -0
  11. snowflake/snowpark_connect/error/__init__.py +11 -0
  12. snowflake/snowpark_connect/error/error_mapping.py +6174 -0
  13. snowflake/snowpark_connect/error/error_utils.py +321 -0
  14. snowflake/snowpark_connect/error/exceptions.py +24 -0
  15. snowflake/snowpark_connect/execute_plan/__init__.py +3 -0
  16. snowflake/snowpark_connect/execute_plan/map_execution_command.py +204 -0
  17. snowflake/snowpark_connect/execute_plan/map_execution_root.py +173 -0
  18. snowflake/snowpark_connect/execute_plan/utils.py +183 -0
  19. snowflake/snowpark_connect/expression/__init__.py +3 -0
  20. snowflake/snowpark_connect/expression/literal.py +90 -0
  21. snowflake/snowpark_connect/expression/map_cast.py +343 -0
  22. snowflake/snowpark_connect/expression/map_expression.py +293 -0
  23. snowflake/snowpark_connect/expression/map_extension.py +104 -0
  24. snowflake/snowpark_connect/expression/map_sql_expression.py +633 -0
  25. snowflake/snowpark_connect/expression/map_udf.py +142 -0
  26. snowflake/snowpark_connect/expression/map_unresolved_attribute.py +241 -0
  27. snowflake/snowpark_connect/expression/map_unresolved_extract_value.py +85 -0
  28. snowflake/snowpark_connect/expression/map_unresolved_function.py +9450 -0
  29. snowflake/snowpark_connect/expression/map_unresolved_star.py +218 -0
  30. snowflake/snowpark_connect/expression/map_update_fields.py +164 -0
  31. snowflake/snowpark_connect/expression/map_window_function.py +258 -0
  32. snowflake/snowpark_connect/expression/typer.py +125 -0
  33. snowflake/snowpark_connect/includes/__init__.py +0 -0
  34. snowflake/snowpark_connect/includes/jars/antlr4-runtime-4.9.3.jar +0 -0
  35. snowflake/snowpark_connect/includes/jars/commons-cli-1.5.0.jar +0 -0
  36. snowflake/snowpark_connect/includes/jars/commons-codec-1.16.1.jar +0 -0
  37. snowflake/snowpark_connect/includes/jars/commons-collections-3.2.2.jar +0 -0
  38. snowflake/snowpark_connect/includes/jars/commons-collections4-4.4.jar +0 -0
  39. snowflake/snowpark_connect/includes/jars/commons-compiler-3.1.9.jar +0 -0
  40. snowflake/snowpark_connect/includes/jars/commons-compress-1.26.0.jar +0 -0
  41. snowflake/snowpark_connect/includes/jars/commons-crypto-1.1.0.jar +0 -0
  42. snowflake/snowpark_connect/includes/jars/commons-dbcp-1.4.jar +0 -0
  43. snowflake/snowpark_connect/includes/jars/commons-io-2.16.1.jar +0 -0
  44. snowflake/snowpark_connect/includes/jars/commons-lang-2.6.jar +0 -0
  45. snowflake/snowpark_connect/includes/jars/commons-lang3-3.12.0.jar +0 -0
  46. snowflake/snowpark_connect/includes/jars/commons-logging-1.1.3.jar +0 -0
  47. snowflake/snowpark_connect/includes/jars/commons-math3-3.6.1.jar +0 -0
  48. snowflake/snowpark_connect/includes/jars/commons-pool-1.5.4.jar +0 -0
  49. snowflake/snowpark_connect/includes/jars/commons-text-1.10.0.jar +0 -0
  50. snowflake/snowpark_connect/includes/jars/hadoop-client-api-3.3.4.jar +0 -0
  51. snowflake/snowpark_connect/includes/jars/jackson-annotations-2.15.2.jar +0 -0
  52. snowflake/snowpark_connect/includes/jars/jackson-core-2.15.2.jar +0 -0
  53. snowflake/snowpark_connect/includes/jars/jackson-core-asl-1.9.13.jar +0 -0
  54. snowflake/snowpark_connect/includes/jars/jackson-databind-2.15.2.jar +0 -0
  55. snowflake/snowpark_connect/includes/jars/jackson-dataformat-yaml-2.15.2.jar +0 -0
  56. snowflake/snowpark_connect/includes/jars/jackson-datatype-jsr310-2.15.2.jar +0 -0
  57. snowflake/snowpark_connect/includes/jars/jackson-mapper-asl-1.9.13.jar +0 -0
  58. snowflake/snowpark_connect/includes/jars/jackson-module-scala_2.12-2.15.2.jar +0 -0
  59. snowflake/snowpark_connect/includes/jars/json4s-ast_2.12-3.7.0-M11.jar +0 -0
  60. snowflake/snowpark_connect/includes/jars/json4s-core_2.12-3.7.0-M11.jar +0 -0
  61. snowflake/snowpark_connect/includes/jars/json4s-jackson_2.12-3.7.0-M11.jar +0 -0
  62. snowflake/snowpark_connect/includes/jars/json4s-scalap_2.12-3.7.0-M11.jar +0 -0
  63. snowflake/snowpark_connect/includes/jars/kryo-shaded-4.0.2.jar +0 -0
  64. snowflake/snowpark_connect/includes/jars/log4j-1.2-api-2.20.0.jar +0 -0
  65. snowflake/snowpark_connect/includes/jars/log4j-api-2.20.0.jar +0 -0
  66. snowflake/snowpark_connect/includes/jars/log4j-core-2.20.0.jar +0 -0
  67. snowflake/snowpark_connect/includes/jars/log4j-slf4j2-impl-2.20.0.jar +0 -0
  68. snowflake/snowpark_connect/includes/jars/paranamer-2.8.jar +0 -0
  69. snowflake/snowpark_connect/includes/jars/scala-collection-compat_2.12-2.7.0.jar +0 -0
  70. snowflake/snowpark_connect/includes/jars/scala-compiler-2.12.18.jar +0 -0
  71. snowflake/snowpark_connect/includes/jars/scala-library-2.12.18.jar +0 -0
  72. snowflake/snowpark_connect/includes/jars/scala-parser-combinators_2.12-2.3.0.jar +0 -0
  73. snowflake/snowpark_connect/includes/jars/scala-reflect-2.12.18.jar +0 -0
  74. snowflake/snowpark_connect/includes/jars/scala-xml_2.12-2.1.0.jar +0 -0
  75. snowflake/snowpark_connect/includes/jars/slf4j-api-2.0.7.jar +0 -0
  76. snowflake/snowpark_connect/includes/jars/spark-catalyst_2.12-3.5.6.jar +0 -0
  77. snowflake/snowpark_connect/includes/jars/spark-common-utils_2.12-3.5.6.jar +0 -0
  78. snowflake/snowpark_connect/includes/jars/spark-core_2.12-3.5.6.jar +0 -0
  79. snowflake/snowpark_connect/includes/jars/spark-graphx_2.12-3.5.6.jar +0 -0
  80. snowflake/snowpark_connect/includes/jars/spark-hive-thriftserver_2.12-3.5.6.jar +0 -0
  81. snowflake/snowpark_connect/includes/jars/spark-hive_2.12-3.5.6.jar +0 -0
  82. snowflake/snowpark_connect/includes/jars/spark-kubernetes_2.12-3.5.6.jar +0 -0
  83. snowflake/snowpark_connect/includes/jars/spark-kvstore_2.12-3.5.6.jar +0 -0
  84. snowflake/snowpark_connect/includes/jars/spark-launcher_2.12-3.5.6.jar +0 -0
  85. snowflake/snowpark_connect/includes/jars/spark-mesos_2.12-3.5.6.jar +0 -0
  86. snowflake/snowpark_connect/includes/jars/spark-mllib-local_2.12-3.5.6.jar +0 -0
  87. snowflake/snowpark_connect/includes/jars/spark-mllib_2.12-3.5.6.jar +0 -0
  88. snowflake/snowpark_connect/includes/jars/spark-network-common_2.12-3.5.6.jar +0 -0
  89. snowflake/snowpark_connect/includes/jars/spark-network-shuffle_2.12-3.5.6.jar +0 -0
  90. snowflake/snowpark_connect/includes/jars/spark-repl_2.12-3.5.6.jar +0 -0
  91. snowflake/snowpark_connect/includes/jars/spark-sketch_2.12-3.5.6.jar +0 -0
  92. snowflake/snowpark_connect/includes/jars/spark-sql-api_2.12-3.5.6.jar +0 -0
  93. snowflake/snowpark_connect/includes/jars/spark-sql_2.12-3.5.6.jar +0 -0
  94. snowflake/snowpark_connect/includes/jars/spark-streaming_2.12-3.5.6.jar +0 -0
  95. snowflake/snowpark_connect/includes/jars/spark-tags_2.12-3.5.6.jar +0 -0
  96. snowflake/snowpark_connect/includes/jars/spark-unsafe_2.12-3.5.6.jar +0 -0
  97. snowflake/snowpark_connect/includes/jars/spark-yarn_2.12-3.5.6.jar +0 -0
  98. snowflake/snowpark_connect/includes/python/__init__.py +21 -0
  99. snowflake/snowpark_connect/includes/python/pyspark/__init__.py +173 -0
  100. snowflake/snowpark_connect/includes/python/pyspark/_globals.py +71 -0
  101. snowflake/snowpark_connect/includes/python/pyspark/_typing.pyi +43 -0
  102. snowflake/snowpark_connect/includes/python/pyspark/accumulators.py +341 -0
  103. snowflake/snowpark_connect/includes/python/pyspark/broadcast.py +383 -0
  104. snowflake/snowpark_connect/includes/python/pyspark/cloudpickle/__init__.py +8 -0
  105. snowflake/snowpark_connect/includes/python/pyspark/cloudpickle/cloudpickle.py +948 -0
  106. snowflake/snowpark_connect/includes/python/pyspark/cloudpickle/cloudpickle_fast.py +844 -0
  107. snowflake/snowpark_connect/includes/python/pyspark/cloudpickle/compat.py +18 -0
  108. snowflake/snowpark_connect/includes/python/pyspark/conf.py +276 -0
  109. snowflake/snowpark_connect/includes/python/pyspark/context.py +2601 -0
  110. snowflake/snowpark_connect/includes/python/pyspark/daemon.py +218 -0
  111. snowflake/snowpark_connect/includes/python/pyspark/errors/__init__.py +70 -0
  112. snowflake/snowpark_connect/includes/python/pyspark/errors/error_classes.py +889 -0
  113. snowflake/snowpark_connect/includes/python/pyspark/errors/exceptions/__init__.py +16 -0
  114. snowflake/snowpark_connect/includes/python/pyspark/errors/exceptions/base.py +228 -0
  115. snowflake/snowpark_connect/includes/python/pyspark/errors/exceptions/captured.py +307 -0
  116. snowflake/snowpark_connect/includes/python/pyspark/errors/exceptions/connect.py +190 -0
  117. snowflake/snowpark_connect/includes/python/pyspark/errors/tests/__init__.py +16 -0
  118. snowflake/snowpark_connect/includes/python/pyspark/errors/tests/test_errors.py +60 -0
  119. snowflake/snowpark_connect/includes/python/pyspark/errors/utils.py +116 -0
  120. snowflake/snowpark_connect/includes/python/pyspark/files.py +165 -0
  121. snowflake/snowpark_connect/includes/python/pyspark/find_spark_home.py +95 -0
  122. snowflake/snowpark_connect/includes/python/pyspark/install.py +203 -0
  123. snowflake/snowpark_connect/includes/python/pyspark/instrumentation_utils.py +190 -0
  124. snowflake/snowpark_connect/includes/python/pyspark/java_gateway.py +248 -0
  125. snowflake/snowpark_connect/includes/python/pyspark/join.py +118 -0
  126. snowflake/snowpark_connect/includes/python/pyspark/ml/__init__.py +71 -0
  127. snowflake/snowpark_connect/includes/python/pyspark/ml/_typing.pyi +84 -0
  128. snowflake/snowpark_connect/includes/python/pyspark/ml/base.py +414 -0
  129. snowflake/snowpark_connect/includes/python/pyspark/ml/classification.py +4332 -0
  130. snowflake/snowpark_connect/includes/python/pyspark/ml/clustering.py +2188 -0
  131. snowflake/snowpark_connect/includes/python/pyspark/ml/common.py +146 -0
  132. snowflake/snowpark_connect/includes/python/pyspark/ml/connect/__init__.py +44 -0
  133. snowflake/snowpark_connect/includes/python/pyspark/ml/connect/base.py +346 -0
  134. snowflake/snowpark_connect/includes/python/pyspark/ml/connect/classification.py +382 -0
  135. snowflake/snowpark_connect/includes/python/pyspark/ml/connect/evaluation.py +291 -0
  136. snowflake/snowpark_connect/includes/python/pyspark/ml/connect/feature.py +258 -0
  137. snowflake/snowpark_connect/includes/python/pyspark/ml/connect/functions.py +77 -0
  138. snowflake/snowpark_connect/includes/python/pyspark/ml/connect/io_utils.py +335 -0
  139. snowflake/snowpark_connect/includes/python/pyspark/ml/connect/pipeline.py +262 -0
  140. snowflake/snowpark_connect/includes/python/pyspark/ml/connect/summarizer.py +120 -0
  141. snowflake/snowpark_connect/includes/python/pyspark/ml/connect/tuning.py +579 -0
  142. snowflake/snowpark_connect/includes/python/pyspark/ml/connect/util.py +173 -0
  143. snowflake/snowpark_connect/includes/python/pyspark/ml/deepspeed/__init__.py +16 -0
  144. snowflake/snowpark_connect/includes/python/pyspark/ml/deepspeed/deepspeed_distributor.py +165 -0
  145. snowflake/snowpark_connect/includes/python/pyspark/ml/deepspeed/tests/test_deepspeed_distributor.py +306 -0
  146. snowflake/snowpark_connect/includes/python/pyspark/ml/dl_util.py +150 -0
  147. snowflake/snowpark_connect/includes/python/pyspark/ml/evaluation.py +1166 -0
  148. snowflake/snowpark_connect/includes/python/pyspark/ml/feature.py +7474 -0
  149. snowflake/snowpark_connect/includes/python/pyspark/ml/fpm.py +543 -0
  150. snowflake/snowpark_connect/includes/python/pyspark/ml/functions.py +842 -0
  151. snowflake/snowpark_connect/includes/python/pyspark/ml/image.py +271 -0
  152. snowflake/snowpark_connect/includes/python/pyspark/ml/linalg/__init__.py +1382 -0
  153. snowflake/snowpark_connect/includes/python/pyspark/ml/model_cache.py +55 -0
  154. snowflake/snowpark_connect/includes/python/pyspark/ml/param/__init__.py +602 -0
  155. snowflake/snowpark_connect/includes/python/pyspark/ml/param/_shared_params_code_gen.py +368 -0
  156. snowflake/snowpark_connect/includes/python/pyspark/ml/param/shared.py +878 -0
  157. snowflake/snowpark_connect/includes/python/pyspark/ml/pipeline.py +451 -0
  158. snowflake/snowpark_connect/includes/python/pyspark/ml/recommendation.py +748 -0
  159. snowflake/snowpark_connect/includes/python/pyspark/ml/regression.py +3335 -0
  160. snowflake/snowpark_connect/includes/python/pyspark/ml/stat.py +523 -0
  161. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/__init__.py +16 -0
  162. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_connect_classification.py +53 -0
  163. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_connect_evaluation.py +50 -0
  164. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_connect_feature.py +43 -0
  165. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_connect_function.py +114 -0
  166. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_connect_pipeline.py +47 -0
  167. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_connect_summarizer.py +43 -0
  168. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_connect_tuning.py +46 -0
  169. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_legacy_mode_classification.py +238 -0
  170. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_legacy_mode_evaluation.py +194 -0
  171. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_legacy_mode_feature.py +156 -0
  172. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_legacy_mode_pipeline.py +184 -0
  173. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_legacy_mode_summarizer.py +78 -0
  174. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_legacy_mode_tuning.py +292 -0
  175. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_parity_torch_data_loader.py +50 -0
  176. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_parity_torch_distributor.py +152 -0
  177. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_algorithms.py +456 -0
  178. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_base.py +96 -0
  179. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_dl_util.py +186 -0
  180. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_evaluation.py +77 -0
  181. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_feature.py +401 -0
  182. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_functions.py +528 -0
  183. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_image.py +82 -0
  184. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_linalg.py +409 -0
  185. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_model_cache.py +55 -0
  186. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_param.py +441 -0
  187. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_persistence.py +546 -0
  188. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_pipeline.py +71 -0
  189. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_stat.py +52 -0
  190. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_training_summary.py +494 -0
  191. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_util.py +85 -0
  192. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_wrapper.py +138 -0
  193. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/tuning/__init__.py +16 -0
  194. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/tuning/test_cv_io_basic.py +151 -0
  195. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/tuning/test_cv_io_nested.py +97 -0
  196. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/tuning/test_cv_io_pipeline.py +143 -0
  197. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/tuning/test_tuning.py +551 -0
  198. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/tuning/test_tvs_io_basic.py +137 -0
  199. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/tuning/test_tvs_io_nested.py +96 -0
  200. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/tuning/test_tvs_io_pipeline.py +142 -0
  201. snowflake/snowpark_connect/includes/python/pyspark/ml/torch/__init__.py +16 -0
  202. snowflake/snowpark_connect/includes/python/pyspark/ml/torch/data.py +100 -0
  203. snowflake/snowpark_connect/includes/python/pyspark/ml/torch/distributor.py +1133 -0
  204. snowflake/snowpark_connect/includes/python/pyspark/ml/torch/log_communication.py +198 -0
  205. snowflake/snowpark_connect/includes/python/pyspark/ml/torch/tests/__init__.py +16 -0
  206. snowflake/snowpark_connect/includes/python/pyspark/ml/torch/tests/test_data_loader.py +137 -0
  207. snowflake/snowpark_connect/includes/python/pyspark/ml/torch/tests/test_distributor.py +561 -0
  208. snowflake/snowpark_connect/includes/python/pyspark/ml/torch/tests/test_log_communication.py +172 -0
  209. snowflake/snowpark_connect/includes/python/pyspark/ml/torch/torch_run_process_wrapper.py +83 -0
  210. snowflake/snowpark_connect/includes/python/pyspark/ml/tree.py +434 -0
  211. snowflake/snowpark_connect/includes/python/pyspark/ml/tuning.py +1741 -0
  212. snowflake/snowpark_connect/includes/python/pyspark/ml/util.py +749 -0
  213. snowflake/snowpark_connect/includes/python/pyspark/ml/wrapper.py +465 -0
  214. snowflake/snowpark_connect/includes/python/pyspark/mllib/__init__.py +44 -0
  215. snowflake/snowpark_connect/includes/python/pyspark/mllib/_typing.pyi +33 -0
  216. snowflake/snowpark_connect/includes/python/pyspark/mllib/classification.py +989 -0
  217. snowflake/snowpark_connect/includes/python/pyspark/mllib/clustering.py +1318 -0
  218. snowflake/snowpark_connect/includes/python/pyspark/mllib/common.py +174 -0
  219. snowflake/snowpark_connect/includes/python/pyspark/mllib/evaluation.py +691 -0
  220. snowflake/snowpark_connect/includes/python/pyspark/mllib/feature.py +1085 -0
  221. snowflake/snowpark_connect/includes/python/pyspark/mllib/fpm.py +233 -0
  222. snowflake/snowpark_connect/includes/python/pyspark/mllib/linalg/__init__.py +1653 -0
  223. snowflake/snowpark_connect/includes/python/pyspark/mllib/linalg/distributed.py +1662 -0
  224. snowflake/snowpark_connect/includes/python/pyspark/mllib/random.py +698 -0
  225. snowflake/snowpark_connect/includes/python/pyspark/mllib/recommendation.py +389 -0
  226. snowflake/snowpark_connect/includes/python/pyspark/mllib/regression.py +1067 -0
  227. snowflake/snowpark_connect/includes/python/pyspark/mllib/stat/KernelDensity.py +59 -0
  228. snowflake/snowpark_connect/includes/python/pyspark/mllib/stat/__init__.py +34 -0
  229. snowflake/snowpark_connect/includes/python/pyspark/mllib/stat/_statistics.py +409 -0
  230. snowflake/snowpark_connect/includes/python/pyspark/mllib/stat/distribution.py +39 -0
  231. snowflake/snowpark_connect/includes/python/pyspark/mllib/stat/test.py +86 -0
  232. snowflake/snowpark_connect/includes/python/pyspark/mllib/tests/__init__.py +16 -0
  233. snowflake/snowpark_connect/includes/python/pyspark/mllib/tests/test_algorithms.py +353 -0
  234. snowflake/snowpark_connect/includes/python/pyspark/mllib/tests/test_feature.py +192 -0
  235. snowflake/snowpark_connect/includes/python/pyspark/mllib/tests/test_linalg.py +680 -0
  236. snowflake/snowpark_connect/includes/python/pyspark/mllib/tests/test_stat.py +206 -0
  237. snowflake/snowpark_connect/includes/python/pyspark/mllib/tests/test_streaming_algorithms.py +471 -0
  238. snowflake/snowpark_connect/includes/python/pyspark/mllib/tests/test_util.py +108 -0
  239. snowflake/snowpark_connect/includes/python/pyspark/mllib/tree.py +888 -0
  240. snowflake/snowpark_connect/includes/python/pyspark/mllib/util.py +659 -0
  241. snowflake/snowpark_connect/includes/python/pyspark/pandas/__init__.py +165 -0
  242. snowflake/snowpark_connect/includes/python/pyspark/pandas/_typing.py +52 -0
  243. snowflake/snowpark_connect/includes/python/pyspark/pandas/accessors.py +989 -0
  244. snowflake/snowpark_connect/includes/python/pyspark/pandas/base.py +1804 -0
  245. snowflake/snowpark_connect/includes/python/pyspark/pandas/categorical.py +822 -0
  246. snowflake/snowpark_connect/includes/python/pyspark/pandas/config.py +539 -0
  247. snowflake/snowpark_connect/includes/python/pyspark/pandas/correlation.py +262 -0
  248. snowflake/snowpark_connect/includes/python/pyspark/pandas/data_type_ops/__init__.py +16 -0
  249. snowflake/snowpark_connect/includes/python/pyspark/pandas/data_type_ops/base.py +519 -0
  250. snowflake/snowpark_connect/includes/python/pyspark/pandas/data_type_ops/binary_ops.py +98 -0
  251. snowflake/snowpark_connect/includes/python/pyspark/pandas/data_type_ops/boolean_ops.py +426 -0
  252. snowflake/snowpark_connect/includes/python/pyspark/pandas/data_type_ops/categorical_ops.py +141 -0
  253. snowflake/snowpark_connect/includes/python/pyspark/pandas/data_type_ops/complex_ops.py +145 -0
  254. snowflake/snowpark_connect/includes/python/pyspark/pandas/data_type_ops/date_ops.py +127 -0
  255. snowflake/snowpark_connect/includes/python/pyspark/pandas/data_type_ops/datetime_ops.py +171 -0
  256. snowflake/snowpark_connect/includes/python/pyspark/pandas/data_type_ops/null_ops.py +83 -0
  257. snowflake/snowpark_connect/includes/python/pyspark/pandas/data_type_ops/num_ops.py +588 -0
  258. snowflake/snowpark_connect/includes/python/pyspark/pandas/data_type_ops/string_ops.py +154 -0
  259. snowflake/snowpark_connect/includes/python/pyspark/pandas/data_type_ops/timedelta_ops.py +101 -0
  260. snowflake/snowpark_connect/includes/python/pyspark/pandas/data_type_ops/udt_ops.py +29 -0
  261. snowflake/snowpark_connect/includes/python/pyspark/pandas/datetimes.py +891 -0
  262. snowflake/snowpark_connect/includes/python/pyspark/pandas/exceptions.py +150 -0
  263. snowflake/snowpark_connect/includes/python/pyspark/pandas/extensions.py +388 -0
  264. snowflake/snowpark_connect/includes/python/pyspark/pandas/frame.py +13738 -0
  265. snowflake/snowpark_connect/includes/python/pyspark/pandas/generic.py +3560 -0
  266. snowflake/snowpark_connect/includes/python/pyspark/pandas/groupby.py +4448 -0
  267. snowflake/snowpark_connect/includes/python/pyspark/pandas/indexes/__init__.py +21 -0
  268. snowflake/snowpark_connect/includes/python/pyspark/pandas/indexes/base.py +2783 -0
  269. snowflake/snowpark_connect/includes/python/pyspark/pandas/indexes/category.py +773 -0
  270. snowflake/snowpark_connect/includes/python/pyspark/pandas/indexes/datetimes.py +843 -0
  271. snowflake/snowpark_connect/includes/python/pyspark/pandas/indexes/multi.py +1323 -0
  272. snowflake/snowpark_connect/includes/python/pyspark/pandas/indexes/numeric.py +210 -0
  273. snowflake/snowpark_connect/includes/python/pyspark/pandas/indexes/timedelta.py +197 -0
  274. snowflake/snowpark_connect/includes/python/pyspark/pandas/indexing.py +1862 -0
  275. snowflake/snowpark_connect/includes/python/pyspark/pandas/internal.py +1680 -0
  276. snowflake/snowpark_connect/includes/python/pyspark/pandas/missing/__init__.py +48 -0
  277. snowflake/snowpark_connect/includes/python/pyspark/pandas/missing/common.py +76 -0
  278. snowflake/snowpark_connect/includes/python/pyspark/pandas/missing/frame.py +63 -0
  279. snowflake/snowpark_connect/includes/python/pyspark/pandas/missing/general_functions.py +43 -0
  280. snowflake/snowpark_connect/includes/python/pyspark/pandas/missing/groupby.py +93 -0
  281. snowflake/snowpark_connect/includes/python/pyspark/pandas/missing/indexes.py +184 -0
  282. snowflake/snowpark_connect/includes/python/pyspark/pandas/missing/resample.py +101 -0
  283. snowflake/snowpark_connect/includes/python/pyspark/pandas/missing/scalars.py +29 -0
  284. snowflake/snowpark_connect/includes/python/pyspark/pandas/missing/series.py +69 -0
  285. snowflake/snowpark_connect/includes/python/pyspark/pandas/missing/window.py +168 -0
  286. snowflake/snowpark_connect/includes/python/pyspark/pandas/mlflow.py +238 -0
  287. snowflake/snowpark_connect/includes/python/pyspark/pandas/namespace.py +3807 -0
  288. snowflake/snowpark_connect/includes/python/pyspark/pandas/numpy_compat.py +260 -0
  289. snowflake/snowpark_connect/includes/python/pyspark/pandas/plot/__init__.py +17 -0
  290. snowflake/snowpark_connect/includes/python/pyspark/pandas/plot/core.py +1213 -0
  291. snowflake/snowpark_connect/includes/python/pyspark/pandas/plot/matplotlib.py +928 -0
  292. snowflake/snowpark_connect/includes/python/pyspark/pandas/plot/plotly.py +261 -0
  293. snowflake/snowpark_connect/includes/python/pyspark/pandas/resample.py +816 -0
  294. snowflake/snowpark_connect/includes/python/pyspark/pandas/series.py +7440 -0
  295. snowflake/snowpark_connect/includes/python/pyspark/pandas/sql_formatter.py +308 -0
  296. snowflake/snowpark_connect/includes/python/pyspark/pandas/sql_processor.py +394 -0
  297. snowflake/snowpark_connect/includes/python/pyspark/pandas/strings.py +2371 -0
  298. snowflake/snowpark_connect/includes/python/pyspark/pandas/supported_api_gen.py +378 -0
  299. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/__init__.py +16 -0
  300. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/__init__.py +16 -0
  301. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_any_all.py +177 -0
  302. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_apply_func.py +575 -0
  303. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_binary_ops.py +235 -0
  304. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_combine.py +653 -0
  305. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_compute.py +463 -0
  306. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_corrwith.py +86 -0
  307. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_cov.py +151 -0
  308. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_cumulative.py +139 -0
  309. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_describe.py +458 -0
  310. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_eval.py +86 -0
  311. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_melt.py +202 -0
  312. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_missing_data.py +520 -0
  313. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_pivot.py +361 -0
  314. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/__init__.py +16 -0
  315. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/__init__.py +16 -0
  316. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_any_all.py +40 -0
  317. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_apply_func.py +42 -0
  318. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_binary_ops.py +40 -0
  319. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_combine.py +37 -0
  320. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_compute.py +60 -0
  321. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_corrwith.py +40 -0
  322. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_cov.py +40 -0
  323. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_cumulative.py +90 -0
  324. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_describe.py +40 -0
  325. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_eval.py +40 -0
  326. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_melt.py +40 -0
  327. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_missing_data.py +42 -0
  328. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_pivot.py +37 -0
  329. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/__init__.py +16 -0
  330. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_base.py +36 -0
  331. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_binary_ops.py +42 -0
  332. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_boolean_ops.py +47 -0
  333. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_categorical_ops.py +55 -0
  334. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_complex_ops.py +40 -0
  335. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_date_ops.py +47 -0
  336. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_datetime_ops.py +47 -0
  337. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_null_ops.py +42 -0
  338. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_num_arithmetic.py +43 -0
  339. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_num_ops.py +47 -0
  340. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_num_reverse.py +43 -0
  341. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_string_ops.py +47 -0
  342. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_timedelta_ops.py +47 -0
  343. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_udt_ops.py +40 -0
  344. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/testing_utils.py +226 -0
  345. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/__init__.py +16 -0
  346. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_align.py +39 -0
  347. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_basic_slow.py +55 -0
  348. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_cov_corrwith.py +39 -0
  349. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_dot_frame.py +39 -0
  350. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_dot_series.py +39 -0
  351. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_index.py +39 -0
  352. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_series.py +39 -0
  353. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_setitem_frame.py +43 -0
  354. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_setitem_series.py +43 -0
  355. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/__init__.py +16 -0
  356. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_attrs.py +40 -0
  357. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_constructor.py +39 -0
  358. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_conversion.py +42 -0
  359. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_reindexing.py +42 -0
  360. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_reshaping.py +37 -0
  361. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_spark.py +40 -0
  362. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_take.py +42 -0
  363. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_time_series.py +48 -0
  364. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_truncate.py +40 -0
  365. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/__init__.py +16 -0
  366. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_aggregate.py +40 -0
  367. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_apply_func.py +41 -0
  368. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_cumulative.py +67 -0
  369. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_describe.py +40 -0
  370. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_groupby.py +55 -0
  371. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_head_tail.py +40 -0
  372. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_index.py +38 -0
  373. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_missing_data.py +55 -0
  374. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_split_apply.py +39 -0
  375. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_stat.py +38 -0
  376. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/__init__.py +16 -0
  377. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_align.py +40 -0
  378. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_base.py +50 -0
  379. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_category.py +73 -0
  380. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_datetime.py +39 -0
  381. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_indexing.py +40 -0
  382. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_reindex.py +40 -0
  383. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_rename.py +40 -0
  384. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_reset_index.py +48 -0
  385. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_timedelta.py +39 -0
  386. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/io/__init__.py +16 -0
  387. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/io/test_parity_io.py +40 -0
  388. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/plot/__init__.py +16 -0
  389. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/plot/test_parity_frame_plot.py +45 -0
  390. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/plot/test_parity_frame_plot_matplotlib.py +45 -0
  391. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/plot/test_parity_frame_plot_plotly.py +49 -0
  392. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/plot/test_parity_series_plot.py +37 -0
  393. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/plot/test_parity_series_plot_matplotlib.py +53 -0
  394. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/plot/test_parity_series_plot_plotly.py +45 -0
  395. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/__init__.py +16 -0
  396. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_all_any.py +38 -0
  397. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_arg_ops.py +37 -0
  398. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_as_of.py +37 -0
  399. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_as_type.py +38 -0
  400. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_compute.py +37 -0
  401. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_conversion.py +40 -0
  402. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_cumulative.py +40 -0
  403. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_index.py +38 -0
  404. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_missing_data.py +40 -0
  405. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_series.py +37 -0
  406. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_sort.py +38 -0
  407. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_stat.py +38 -0
  408. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_categorical.py +66 -0
  409. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_config.py +37 -0
  410. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_csv.py +37 -0
  411. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_dataframe_conversion.py +42 -0
  412. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_dataframe_spark_io.py +39 -0
  413. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_default_index.py +49 -0
  414. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_ewm.py +37 -0
  415. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_expanding.py +39 -0
  416. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_extension.py +49 -0
  417. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_frame_spark.py +53 -0
  418. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_generic_functions.py +43 -0
  419. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_indexing.py +49 -0
  420. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_indexops_spark.py +39 -0
  421. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_internal.py +41 -0
  422. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_namespace.py +39 -0
  423. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_numpy_compat.py +60 -0
  424. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_ops_on_diff_frames.py +48 -0
  425. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_ops_on_diff_frames_groupby.py +39 -0
  426. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_ops_on_diff_frames_groupby_expanding.py +44 -0
  427. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_ops_on_diff_frames_groupby_rolling.py +84 -0
  428. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_repr.py +37 -0
  429. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_resample.py +45 -0
  430. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_reshape.py +39 -0
  431. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_rolling.py +39 -0
  432. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_scalars.py +37 -0
  433. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_series_conversion.py +39 -0
  434. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_series_datetime.py +39 -0
  435. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_series_string.py +39 -0
  436. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_spark_functions.py +39 -0
  437. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_sql.py +43 -0
  438. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_stats.py +37 -0
  439. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_typedef.py +36 -0
  440. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_utils.py +37 -0
  441. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_window.py +39 -0
  442. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/__init__.py +16 -0
  443. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_base.py +107 -0
  444. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_binary_ops.py +224 -0
  445. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_boolean_ops.py +825 -0
  446. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_categorical_ops.py +562 -0
  447. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_complex_ops.py +368 -0
  448. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_date_ops.py +257 -0
  449. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_datetime_ops.py +260 -0
  450. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_null_ops.py +178 -0
  451. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_num_arithmetic.py +184 -0
  452. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_num_ops.py +497 -0
  453. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_num_reverse.py +140 -0
  454. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_string_ops.py +354 -0
  455. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_timedelta_ops.py +219 -0
  456. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_udt_ops.py +192 -0
  457. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/testing_utils.py +228 -0
  458. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/__init__.py +16 -0
  459. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_align.py +118 -0
  460. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_basic_slow.py +198 -0
  461. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_cov_corrwith.py +181 -0
  462. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_dot_frame.py +103 -0
  463. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_dot_series.py +141 -0
  464. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_index.py +109 -0
  465. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_series.py +136 -0
  466. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_setitem_frame.py +125 -0
  467. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_setitem_series.py +217 -0
  468. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/__init__.py +16 -0
  469. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_attrs.py +384 -0
  470. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_constructor.py +598 -0
  471. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_conversion.py +73 -0
  472. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_reindexing.py +869 -0
  473. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_reshaping.py +487 -0
  474. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_spark.py +309 -0
  475. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_take.py +156 -0
  476. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_time_series.py +149 -0
  477. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_truncate.py +163 -0
  478. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/__init__.py +16 -0
  479. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_aggregate.py +311 -0
  480. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_apply_func.py +524 -0
  481. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_cumulative.py +419 -0
  482. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_describe.py +144 -0
  483. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_groupby.py +979 -0
  484. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_head_tail.py +234 -0
  485. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_index.py +206 -0
  486. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_missing_data.py +421 -0
  487. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_split_apply.py +187 -0
  488. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_stat.py +397 -0
  489. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/__init__.py +16 -0
  490. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_align.py +100 -0
  491. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_base.py +2743 -0
  492. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_category.py +484 -0
  493. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_datetime.py +276 -0
  494. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_indexing.py +432 -0
  495. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_reindex.py +310 -0
  496. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_rename.py +257 -0
  497. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_reset_index.py +160 -0
  498. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_timedelta.py +128 -0
  499. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/io/__init__.py +16 -0
  500. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/io/test_io.py +137 -0
  501. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/plot/__init__.py +16 -0
  502. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/plot/test_frame_plot.py +170 -0
  503. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/plot/test_frame_plot_matplotlib.py +547 -0
  504. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/plot/test_frame_plot_plotly.py +285 -0
  505. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/plot/test_series_plot.py +106 -0
  506. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/plot/test_series_plot_matplotlib.py +409 -0
  507. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/plot/test_series_plot_plotly.py +247 -0
  508. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/__init__.py +16 -0
  509. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_all_any.py +105 -0
  510. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_arg_ops.py +197 -0
  511. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_as_of.py +137 -0
  512. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_as_type.py +227 -0
  513. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_compute.py +634 -0
  514. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_conversion.py +88 -0
  515. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_cumulative.py +139 -0
  516. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_index.py +475 -0
  517. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_missing_data.py +265 -0
  518. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_series.py +818 -0
  519. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_sort.py +162 -0
  520. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_stat.py +780 -0
  521. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_categorical.py +741 -0
  522. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_config.py +160 -0
  523. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_csv.py +453 -0
  524. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_dataframe_conversion.py +281 -0
  525. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_dataframe_spark_io.py +487 -0
  526. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_default_index.py +109 -0
  527. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_ewm.py +434 -0
  528. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_expanding.py +253 -0
  529. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_extension.py +152 -0
  530. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_frame_spark.py +162 -0
  531. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_generic_functions.py +234 -0
  532. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_indexing.py +1339 -0
  533. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_indexops_spark.py +82 -0
  534. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_internal.py +124 -0
  535. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_namespace.py +638 -0
  536. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_numpy_compat.py +200 -0
  537. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_ops_on_diff_frames.py +1355 -0
  538. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_ops_on_diff_frames_groupby.py +655 -0
  539. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_ops_on_diff_frames_groupby_expanding.py +113 -0
  540. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_ops_on_diff_frames_groupby_rolling.py +118 -0
  541. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_repr.py +192 -0
  542. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_resample.py +346 -0
  543. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_reshape.py +495 -0
  544. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_rolling.py +263 -0
  545. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_scalars.py +59 -0
  546. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_series_conversion.py +85 -0
  547. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_series_datetime.py +364 -0
  548. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_series_string.py +362 -0
  549. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_spark_functions.py +46 -0
  550. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_sql.py +123 -0
  551. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_stats.py +581 -0
  552. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_typedef.py +447 -0
  553. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_utils.py +301 -0
  554. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_window.py +465 -0
  555. snowflake/snowpark_connect/includes/python/pyspark/pandas/typedef/__init__.py +18 -0
  556. snowflake/snowpark_connect/includes/python/pyspark/pandas/typedef/typehints.py +874 -0
  557. snowflake/snowpark_connect/includes/python/pyspark/pandas/usage_logging/__init__.py +143 -0
  558. snowflake/snowpark_connect/includes/python/pyspark/pandas/usage_logging/usage_logger.py +132 -0
  559. snowflake/snowpark_connect/includes/python/pyspark/pandas/utils.py +1063 -0
  560. snowflake/snowpark_connect/includes/python/pyspark/pandas/window.py +2702 -0
  561. snowflake/snowpark_connect/includes/python/pyspark/profiler.py +489 -0
  562. snowflake/snowpark_connect/includes/python/pyspark/py.typed +1 -0
  563. snowflake/snowpark_connect/includes/python/pyspark/python/pyspark/shell.py +123 -0
  564. snowflake/snowpark_connect/includes/python/pyspark/rdd.py +5518 -0
  565. snowflake/snowpark_connect/includes/python/pyspark/rddsampler.py +115 -0
  566. snowflake/snowpark_connect/includes/python/pyspark/resource/__init__.py +38 -0
  567. snowflake/snowpark_connect/includes/python/pyspark/resource/information.py +69 -0
  568. snowflake/snowpark_connect/includes/python/pyspark/resource/profile.py +317 -0
  569. snowflake/snowpark_connect/includes/python/pyspark/resource/requests.py +539 -0
  570. snowflake/snowpark_connect/includes/python/pyspark/resource/tests/__init__.py +16 -0
  571. snowflake/snowpark_connect/includes/python/pyspark/resource/tests/test_resources.py +83 -0
  572. snowflake/snowpark_connect/includes/python/pyspark/resultiterable.py +45 -0
  573. snowflake/snowpark_connect/includes/python/pyspark/serializers.py +681 -0
  574. snowflake/snowpark_connect/includes/python/pyspark/shell.py +123 -0
  575. snowflake/snowpark_connect/includes/python/pyspark/shuffle.py +854 -0
  576. snowflake/snowpark_connect/includes/python/pyspark/sql/__init__.py +75 -0
  577. snowflake/snowpark_connect/includes/python/pyspark/sql/_typing.pyi +80 -0
  578. snowflake/snowpark_connect/includes/python/pyspark/sql/avro/__init__.py +18 -0
  579. snowflake/snowpark_connect/includes/python/pyspark/sql/avro/functions.py +188 -0
  580. snowflake/snowpark_connect/includes/python/pyspark/sql/catalog.py +1270 -0
  581. snowflake/snowpark_connect/includes/python/pyspark/sql/column.py +1431 -0
  582. snowflake/snowpark_connect/includes/python/pyspark/sql/conf.py +99 -0
  583. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/__init__.py +18 -0
  584. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/_typing.py +90 -0
  585. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/avro/__init__.py +18 -0
  586. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/avro/functions.py +107 -0
  587. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/catalog.py +356 -0
  588. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/client/__init__.py +22 -0
  589. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/client/artifact.py +412 -0
  590. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/client/core.py +1689 -0
  591. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/client/reattach.py +340 -0
  592. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/column.py +514 -0
  593. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/conf.py +128 -0
  594. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/conversion.py +490 -0
  595. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/dataframe.py +2172 -0
  596. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/expressions.py +1056 -0
  597. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/functions.py +3937 -0
  598. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/group.py +418 -0
  599. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/plan.py +2289 -0
  600. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/proto/__init__.py +25 -0
  601. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/proto/base_pb2.py +203 -0
  602. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/proto/base_pb2.pyi +2718 -0
  603. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/proto/base_pb2_grpc.py +423 -0
  604. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/proto/catalog_pb2.py +109 -0
  605. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/proto/catalog_pb2.pyi +1130 -0
  606. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/proto/commands_pb2.py +141 -0
  607. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/proto/commands_pb2.pyi +1766 -0
  608. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/proto/common_pb2.py +47 -0
  609. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/proto/common_pb2.pyi +123 -0
  610. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/proto/example_plugins_pb2.py +53 -0
  611. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/proto/example_plugins_pb2.pyi +112 -0
  612. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/proto/expressions_pb2.py +107 -0
  613. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/proto/expressions_pb2.pyi +1507 -0
  614. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/proto/relations_pb2.py +195 -0
  615. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/proto/relations_pb2.pyi +3613 -0
  616. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/proto/types_pb2.py +95 -0
  617. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/proto/types_pb2.pyi +980 -0
  618. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/protobuf/__init__.py +18 -0
  619. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/protobuf/functions.py +166 -0
  620. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/readwriter.py +861 -0
  621. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/session.py +952 -0
  622. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/streaming/__init__.py +22 -0
  623. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/streaming/query.py +295 -0
  624. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/streaming/readwriter.py +618 -0
  625. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/streaming/worker/__init__.py +18 -0
  626. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/streaming/worker/foreach_batch_worker.py +87 -0
  627. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/streaming/worker/listener_worker.py +100 -0
  628. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/types.py +301 -0
  629. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/udf.py +296 -0
  630. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/udtf.py +200 -0
  631. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/utils.py +58 -0
  632. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/window.py +266 -0
  633. snowflake/snowpark_connect/includes/python/pyspark/sql/context.py +818 -0
  634. snowflake/snowpark_connect/includes/python/pyspark/sql/dataframe.py +5973 -0
  635. snowflake/snowpark_connect/includes/python/pyspark/sql/functions.py +15889 -0
  636. snowflake/snowpark_connect/includes/python/pyspark/sql/group.py +547 -0
  637. snowflake/snowpark_connect/includes/python/pyspark/sql/observation.py +152 -0
  638. snowflake/snowpark_connect/includes/python/pyspark/sql/pandas/__init__.py +21 -0
  639. snowflake/snowpark_connect/includes/python/pyspark/sql/pandas/_typing/__init__.pyi +344 -0
  640. snowflake/snowpark_connect/includes/python/pyspark/sql/pandas/_typing/protocols/__init__.pyi +17 -0
  641. snowflake/snowpark_connect/includes/python/pyspark/sql/pandas/_typing/protocols/frame.pyi +20 -0
  642. snowflake/snowpark_connect/includes/python/pyspark/sql/pandas/_typing/protocols/series.pyi +20 -0
  643. snowflake/snowpark_connect/includes/python/pyspark/sql/pandas/conversion.py +671 -0
  644. snowflake/snowpark_connect/includes/python/pyspark/sql/pandas/functions.py +480 -0
  645. snowflake/snowpark_connect/includes/python/pyspark/sql/pandas/functions.pyi +132 -0
  646. snowflake/snowpark_connect/includes/python/pyspark/sql/pandas/group_ops.py +523 -0
  647. snowflake/snowpark_connect/includes/python/pyspark/sql/pandas/map_ops.py +216 -0
  648. snowflake/snowpark_connect/includes/python/pyspark/sql/pandas/serializers.py +1019 -0
  649. snowflake/snowpark_connect/includes/python/pyspark/sql/pandas/typehints.py +172 -0
  650. snowflake/snowpark_connect/includes/python/pyspark/sql/pandas/types.py +972 -0
  651. snowflake/snowpark_connect/includes/python/pyspark/sql/pandas/utils.py +86 -0
  652. snowflake/snowpark_connect/includes/python/pyspark/sql/protobuf/__init__.py +18 -0
  653. snowflake/snowpark_connect/includes/python/pyspark/sql/protobuf/functions.py +334 -0
  654. snowflake/snowpark_connect/includes/python/pyspark/sql/readwriter.py +2159 -0
  655. snowflake/snowpark_connect/includes/python/pyspark/sql/session.py +2088 -0
  656. snowflake/snowpark_connect/includes/python/pyspark/sql/sql_formatter.py +84 -0
  657. snowflake/snowpark_connect/includes/python/pyspark/sql/streaming/__init__.py +21 -0
  658. snowflake/snowpark_connect/includes/python/pyspark/sql/streaming/listener.py +1050 -0
  659. snowflake/snowpark_connect/includes/python/pyspark/sql/streaming/query.py +746 -0
  660. snowflake/snowpark_connect/includes/python/pyspark/sql/streaming/readwriter.py +1652 -0
  661. snowflake/snowpark_connect/includes/python/pyspark/sql/streaming/state.py +288 -0
  662. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/__init__.py +16 -0
  663. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/__init__.py +16 -0
  664. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/client/__init__.py +16 -0
  665. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/client/test_artifact.py +420 -0
  666. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/client/test_client.py +358 -0
  667. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/streaming/__init__.py +16 -0
  668. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/streaming/test_parity_foreach.py +36 -0
  669. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/streaming/test_parity_foreach_batch.py +44 -0
  670. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/streaming/test_parity_listener.py +116 -0
  671. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/streaming/test_parity_streaming.py +35 -0
  672. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_connect_basic.py +3612 -0
  673. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_connect_column.py +1042 -0
  674. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_connect_function.py +2381 -0
  675. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_connect_plan.py +1060 -0
  676. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_arrow.py +163 -0
  677. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_arrow_map.py +38 -0
  678. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_arrow_python_udf.py +48 -0
  679. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_catalog.py +36 -0
  680. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_column.py +55 -0
  681. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_conf.py +36 -0
  682. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_dataframe.py +96 -0
  683. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_datasources.py +44 -0
  684. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_errors.py +36 -0
  685. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_functions.py +59 -0
  686. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_group.py +36 -0
  687. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_pandas_cogrouped_map.py +59 -0
  688. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_pandas_grouped_map.py +74 -0
  689. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_pandas_grouped_map_with_state.py +62 -0
  690. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_pandas_map.py +58 -0
  691. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_pandas_udf.py +70 -0
  692. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_pandas_udf_grouped_agg.py +50 -0
  693. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_pandas_udf_scalar.py +68 -0
  694. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_pandas_udf_window.py +40 -0
  695. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_readwriter.py +46 -0
  696. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_serde.py +44 -0
  697. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_types.py +100 -0
  698. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_udf.py +100 -0
  699. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_udtf.py +163 -0
  700. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_session.py +181 -0
  701. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_utils.py +42 -0
  702. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/__init__.py +16 -0
  703. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_cogrouped_map.py +623 -0
  704. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_grouped_map.py +869 -0
  705. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_grouped_map_with_state.py +342 -0
  706. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_map.py +436 -0
  707. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_udf.py +363 -0
  708. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_udf_grouped_agg.py +592 -0
  709. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_udf_scalar.py +1503 -0
  710. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_udf_typehints.py +392 -0
  711. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_udf_typehints_with_future_annotations.py +375 -0
  712. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_udf_window.py +411 -0
  713. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/streaming/__init__.py +16 -0
  714. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/streaming/test_streaming.py +401 -0
  715. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/streaming/test_streaming_foreach.py +295 -0
  716. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/streaming/test_streaming_foreach_batch.py +106 -0
  717. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/streaming/test_streaming_listener.py +558 -0
  718. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_arrow.py +1346 -0
  719. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_arrow_map.py +182 -0
  720. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_arrow_python_udf.py +202 -0
  721. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_catalog.py +503 -0
  722. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_column.py +225 -0
  723. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_conf.py +83 -0
  724. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_context.py +201 -0
  725. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_dataframe.py +1931 -0
  726. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_datasources.py +256 -0
  727. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_errors.py +69 -0
  728. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_functions.py +1349 -0
  729. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_group.py +53 -0
  730. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_pandas_sqlmetrics.py +68 -0
  731. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_readwriter.py +283 -0
  732. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_serde.py +155 -0
  733. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_session.py +412 -0
  734. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_types.py +1581 -0
  735. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_udf.py +961 -0
  736. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_udf_profiler.py +165 -0
  737. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_udtf.py +1456 -0
  738. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_utils.py +1686 -0
  739. snowflake/snowpark_connect/includes/python/pyspark/sql/types.py +2558 -0
  740. snowflake/snowpark_connect/includes/python/pyspark/sql/udf.py +714 -0
  741. snowflake/snowpark_connect/includes/python/pyspark/sql/udtf.py +325 -0
  742. snowflake/snowpark_connect/includes/python/pyspark/sql/utils.py +339 -0
  743. snowflake/snowpark_connect/includes/python/pyspark/sql/window.py +492 -0
  744. snowflake/snowpark_connect/includes/python/pyspark/statcounter.py +165 -0
  745. snowflake/snowpark_connect/includes/python/pyspark/status.py +112 -0
  746. snowflake/snowpark_connect/includes/python/pyspark/storagelevel.py +97 -0
  747. snowflake/snowpark_connect/includes/python/pyspark/streaming/__init__.py +22 -0
  748. snowflake/snowpark_connect/includes/python/pyspark/streaming/context.py +471 -0
  749. snowflake/snowpark_connect/includes/python/pyspark/streaming/dstream.py +933 -0
  750. snowflake/snowpark_connect/includes/python/pyspark/streaming/kinesis.py +205 -0
  751. snowflake/snowpark_connect/includes/python/pyspark/streaming/listener.py +83 -0
  752. snowflake/snowpark_connect/includes/python/pyspark/streaming/tests/__init__.py +16 -0
  753. snowflake/snowpark_connect/includes/python/pyspark/streaming/tests/test_context.py +184 -0
  754. snowflake/snowpark_connect/includes/python/pyspark/streaming/tests/test_dstream.py +706 -0
  755. snowflake/snowpark_connect/includes/python/pyspark/streaming/tests/test_kinesis.py +118 -0
  756. snowflake/snowpark_connect/includes/python/pyspark/streaming/tests/test_listener.py +160 -0
  757. snowflake/snowpark_connect/includes/python/pyspark/streaming/util.py +168 -0
  758. snowflake/snowpark_connect/includes/python/pyspark/taskcontext.py +502 -0
  759. snowflake/snowpark_connect/includes/python/pyspark/testing/__init__.py +21 -0
  760. snowflake/snowpark_connect/includes/python/pyspark/testing/connectutils.py +199 -0
  761. snowflake/snowpark_connect/includes/python/pyspark/testing/mllibutils.py +30 -0
  762. snowflake/snowpark_connect/includes/python/pyspark/testing/mlutils.py +275 -0
  763. snowflake/snowpark_connect/includes/python/pyspark/testing/objects.py +121 -0
  764. snowflake/snowpark_connect/includes/python/pyspark/testing/pandasutils.py +714 -0
  765. snowflake/snowpark_connect/includes/python/pyspark/testing/sqlutils.py +168 -0
  766. snowflake/snowpark_connect/includes/python/pyspark/testing/streamingutils.py +178 -0
  767. snowflake/snowpark_connect/includes/python/pyspark/testing/utils.py +636 -0
  768. snowflake/snowpark_connect/includes/python/pyspark/tests/__init__.py +16 -0
  769. snowflake/snowpark_connect/includes/python/pyspark/tests/test_appsubmit.py +306 -0
  770. snowflake/snowpark_connect/includes/python/pyspark/tests/test_broadcast.py +196 -0
  771. snowflake/snowpark_connect/includes/python/pyspark/tests/test_conf.py +44 -0
  772. snowflake/snowpark_connect/includes/python/pyspark/tests/test_context.py +346 -0
  773. snowflake/snowpark_connect/includes/python/pyspark/tests/test_daemon.py +89 -0
  774. snowflake/snowpark_connect/includes/python/pyspark/tests/test_install_spark.py +124 -0
  775. snowflake/snowpark_connect/includes/python/pyspark/tests/test_join.py +69 -0
  776. snowflake/snowpark_connect/includes/python/pyspark/tests/test_memory_profiler.py +167 -0
  777. snowflake/snowpark_connect/includes/python/pyspark/tests/test_pin_thread.py +194 -0
  778. snowflake/snowpark_connect/includes/python/pyspark/tests/test_profiler.py +168 -0
  779. snowflake/snowpark_connect/includes/python/pyspark/tests/test_rdd.py +939 -0
  780. snowflake/snowpark_connect/includes/python/pyspark/tests/test_rddbarrier.py +52 -0
  781. snowflake/snowpark_connect/includes/python/pyspark/tests/test_rddsampler.py +66 -0
  782. snowflake/snowpark_connect/includes/python/pyspark/tests/test_readwrite.py +368 -0
  783. snowflake/snowpark_connect/includes/python/pyspark/tests/test_serializers.py +257 -0
  784. snowflake/snowpark_connect/includes/python/pyspark/tests/test_shuffle.py +267 -0
  785. snowflake/snowpark_connect/includes/python/pyspark/tests/test_stage_sched.py +153 -0
  786. snowflake/snowpark_connect/includes/python/pyspark/tests/test_statcounter.py +130 -0
  787. snowflake/snowpark_connect/includes/python/pyspark/tests/test_taskcontext.py +350 -0
  788. snowflake/snowpark_connect/includes/python/pyspark/tests/test_util.py +97 -0
  789. snowflake/snowpark_connect/includes/python/pyspark/tests/test_worker.py +271 -0
  790. snowflake/snowpark_connect/includes/python/pyspark/traceback_utils.py +81 -0
  791. snowflake/snowpark_connect/includes/python/pyspark/util.py +416 -0
  792. snowflake/snowpark_connect/includes/python/pyspark/version.py +19 -0
  793. snowflake/snowpark_connect/includes/python/pyspark/worker.py +1307 -0
  794. snowflake/snowpark_connect/includes/python/pyspark/worker_util.py +46 -0
  795. snowflake/snowpark_connect/proto/__init__.py +10 -0
  796. snowflake/snowpark_connect/proto/control_pb2.py +35 -0
  797. snowflake/snowpark_connect/proto/control_pb2.pyi +38 -0
  798. snowflake/snowpark_connect/proto/control_pb2_grpc.py +183 -0
  799. snowflake/snowpark_connect/proto/snowflake_expression_ext_pb2.py +35 -0
  800. snowflake/snowpark_connect/proto/snowflake_expression_ext_pb2.pyi +53 -0
  801. snowflake/snowpark_connect/proto/snowflake_rdd_pb2.pyi +39 -0
  802. snowflake/snowpark_connect/proto/snowflake_relation_ext_pb2.py +47 -0
  803. snowflake/snowpark_connect/proto/snowflake_relation_ext_pb2.pyi +111 -0
  804. snowflake/snowpark_connect/relation/__init__.py +3 -0
  805. snowflake/snowpark_connect/relation/catalogs/__init__.py +12 -0
  806. snowflake/snowpark_connect/relation/catalogs/abstract_spark_catalog.py +287 -0
  807. snowflake/snowpark_connect/relation/catalogs/snowflake_catalog.py +467 -0
  808. snowflake/snowpark_connect/relation/catalogs/utils.py +51 -0
  809. snowflake/snowpark_connect/relation/io_utils.py +76 -0
  810. snowflake/snowpark_connect/relation/map_aggregate.py +322 -0
  811. snowflake/snowpark_connect/relation/map_catalog.py +151 -0
  812. snowflake/snowpark_connect/relation/map_column_ops.py +1068 -0
  813. snowflake/snowpark_connect/relation/map_crosstab.py +48 -0
  814. snowflake/snowpark_connect/relation/map_extension.py +412 -0
  815. snowflake/snowpark_connect/relation/map_join.py +341 -0
  816. snowflake/snowpark_connect/relation/map_local_relation.py +326 -0
  817. snowflake/snowpark_connect/relation/map_map_partitions.py +146 -0
  818. snowflake/snowpark_connect/relation/map_relation.py +253 -0
  819. snowflake/snowpark_connect/relation/map_row_ops.py +716 -0
  820. snowflake/snowpark_connect/relation/map_sample_by.py +35 -0
  821. snowflake/snowpark_connect/relation/map_show_string.py +50 -0
  822. snowflake/snowpark_connect/relation/map_sql.py +1874 -0
  823. snowflake/snowpark_connect/relation/map_stats.py +324 -0
  824. snowflake/snowpark_connect/relation/map_subquery_alias.py +32 -0
  825. snowflake/snowpark_connect/relation/map_udtf.py +288 -0
  826. snowflake/snowpark_connect/relation/read/__init__.py +7 -0
  827. snowflake/snowpark_connect/relation/read/jdbc_read_dbapi.py +668 -0
  828. snowflake/snowpark_connect/relation/read/map_read.py +367 -0
  829. snowflake/snowpark_connect/relation/read/map_read_csv.py +142 -0
  830. snowflake/snowpark_connect/relation/read/map_read_jdbc.py +108 -0
  831. snowflake/snowpark_connect/relation/read/map_read_json.py +344 -0
  832. snowflake/snowpark_connect/relation/read/map_read_parquet.py +194 -0
  833. snowflake/snowpark_connect/relation/read/map_read_socket.py +59 -0
  834. snowflake/snowpark_connect/relation/read/map_read_table.py +109 -0
  835. snowflake/snowpark_connect/relation/read/map_read_text.py +106 -0
  836. snowflake/snowpark_connect/relation/read/reader_config.py +399 -0
  837. snowflake/snowpark_connect/relation/read/utils.py +155 -0
  838. snowflake/snowpark_connect/relation/stage_locator.py +161 -0
  839. snowflake/snowpark_connect/relation/utils.py +219 -0
  840. snowflake/snowpark_connect/relation/write/__init__.py +3 -0
  841. snowflake/snowpark_connect/relation/write/jdbc_write_dbapi.py +339 -0
  842. snowflake/snowpark_connect/relation/write/map_write.py +436 -0
  843. snowflake/snowpark_connect/relation/write/map_write_jdbc.py +48 -0
  844. snowflake/snowpark_connect/resources/java_udfs-1.0-SNAPSHOT.jar +0 -0
  845. snowflake/snowpark_connect/resources_initializer.py +75 -0
  846. snowflake/snowpark_connect/server.py +1136 -0
  847. snowflake/snowpark_connect/start_server.py +32 -0
  848. snowflake/snowpark_connect/tcm.py +8 -0
  849. snowflake/snowpark_connect/type_mapping.py +1003 -0
  850. snowflake/snowpark_connect/typed_column.py +94 -0
  851. snowflake/snowpark_connect/utils/__init__.py +3 -0
  852. snowflake/snowpark_connect/utils/artifacts.py +48 -0
  853. snowflake/snowpark_connect/utils/attribute_handling.py +72 -0
  854. snowflake/snowpark_connect/utils/cache.py +84 -0
  855. snowflake/snowpark_connect/utils/concurrent.py +124 -0
  856. snowflake/snowpark_connect/utils/context.py +390 -0
  857. snowflake/snowpark_connect/utils/describe_query_cache.py +231 -0
  858. snowflake/snowpark_connect/utils/interrupt.py +85 -0
  859. snowflake/snowpark_connect/utils/io_utils.py +35 -0
  860. snowflake/snowpark_connect/utils/pandas_udtf_utils.py +117 -0
  861. snowflake/snowpark_connect/utils/profiling.py +47 -0
  862. snowflake/snowpark_connect/utils/session.py +180 -0
  863. snowflake/snowpark_connect/utils/snowpark_connect_logging.py +38 -0
  864. snowflake/snowpark_connect/utils/telemetry.py +513 -0
  865. snowflake/snowpark_connect/utils/udf_cache.py +392 -0
  866. snowflake/snowpark_connect/utils/udf_helper.py +328 -0
  867. snowflake/snowpark_connect/utils/udf_utils.py +310 -0
  868. snowflake/snowpark_connect/utils/udtf_helper.py +420 -0
  869. snowflake/snowpark_connect/utils/udtf_utils.py +799 -0
  870. snowflake/snowpark_connect/utils/xxhash64.py +247 -0
  871. snowflake/snowpark_connect/version.py +6 -0
  872. snowpark_connect-0.20.2.data/scripts/snowpark-connect +71 -0
  873. snowpark_connect-0.20.2.data/scripts/snowpark-session +11 -0
  874. snowpark_connect-0.20.2.data/scripts/snowpark-submit +354 -0
  875. snowpark_connect-0.20.2.dist-info/METADATA +37 -0
  876. snowpark_connect-0.20.2.dist-info/RECORD +879 -0
  877. snowpark_connect-0.20.2.dist-info/WHEEL +5 -0
  878. snowpark_connect-0.20.2.dist-info/licenses/LICENSE.txt +202 -0
  879. snowpark_connect-0.20.2.dist-info/top_level.txt +1 -0
@@ -0,0 +1,1136 @@
1
+ #
2
+ # Copyright (c) 2012-2025 Snowflake Computing Inc. All rights reserved.
3
+ #
4
+
5
+ # Some content in this file is derived from Apache Spark. In accordance
6
+ # with Apache 2 license, the license for Apache Spark is as follows:
7
+ #
8
+ # Licensed to the Apache Software Foundation (ASF) under one or more
9
+ # contributor license agreements. See the NOTICE file distributed with
10
+ # this work for additional information regarding copyright ownership.
11
+ # The ASF licenses this file to You under the Apache License, Version 2.0
12
+ # (the "License"); you may not use this file except in compliance with
13
+ # the License. You may obtain a copy of the License at
14
+ #
15
+ # http://www.apache.org/licenses/LICENSE-2.0
16
+ #
17
+ # Unless required by applicable law or agreed to in writing, software
18
+ # distributed under the License is distributed on an "AS IS" BASIS,
19
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
20
+ # See the License for the specific language governing permissions and
21
+ # limitations under the License.
22
+ #
23
+
24
+ import atexit
25
+ import logging
26
+ import os
27
+ import pathlib
28
+ import socket
29
+ import tempfile
30
+ import threading
31
+ import urllib.parse
32
+ import zipfile
33
+ from concurrent import futures
34
+ from typing import Any, Callable, Dict, List, Optional, Tuple
35
+
36
+ import grpc
37
+ import jpype
38
+ import pyspark
39
+ import pyspark.sql.connect.proto.base_pb2 as proto_base
40
+ import pyspark.sql.connect.proto.base_pb2_grpc as proto_base_grpc
41
+ import pyspark.sql.connect.proto.common_pb2 as common_proto
42
+ import pyspark.sql.connect.proto.relations_pb2 as relations_proto
43
+ import pyspark.sql.connect.proto.types_pb2 as types_proto
44
+ from packaging import version
45
+ from pyspark import StorageLevel
46
+ from pyspark.conf import SparkConf
47
+ from pyspark.errors import PySparkValueError
48
+ from pyspark.sql.connect.client.core import ChannelBuilder
49
+ from pyspark.sql.connect.session import SparkSession
50
+
51
+ import snowflake.snowpark_connect
52
+ import snowflake.snowpark_connect.proto.control_pb2_grpc as control_grpc
53
+ import snowflake.snowpark_connect.tcm as tcm
54
+ from snowflake import snowpark
55
+ from snowflake.snowpark_connect.analyze_plan.map_tree_string import map_tree_string
56
+ from snowflake.snowpark_connect.config import route_config_proto
57
+ from snowflake.snowpark_connect.constants import SERVER_SIDE_SESSION_ID
58
+ from snowflake.snowpark_connect.control_server import ControlServicer
59
+ from snowflake.snowpark_connect.error.error_utils import build_grpc_error_response
60
+ from snowflake.snowpark_connect.execute_plan.map_execution_command import (
61
+ map_execution_command,
62
+ )
63
+ from snowflake.snowpark_connect.execute_plan.map_execution_root import (
64
+ map_execution_root,
65
+ )
66
+ from snowflake.snowpark_connect.relation.map_local_relation import map_local_relation
67
+ from snowflake.snowpark_connect.relation.map_relation import map_relation
68
+ from snowflake.snowpark_connect.relation.utils import get_semantic_string
69
+ from snowflake.snowpark_connect.resources_initializer import initialize_resources_async
70
+ from snowflake.snowpark_connect.type_mapping import (
71
+ parse_ddl_string,
72
+ snowpark_to_proto_type,
73
+ )
74
+ from snowflake.snowpark_connect.utils.artifacts import check_checksum, write_artifact
75
+ from snowflake.snowpark_connect.utils.cache import (
76
+ df_cache_map_get,
77
+ df_cache_map_pop,
78
+ df_cache_map_put_if_absent,
79
+ )
80
+ from snowflake.snowpark_connect.utils.context import (
81
+ clear_context_data,
82
+ get_session_id,
83
+ set_session_id,
84
+ set_spark_version,
85
+ )
86
+ from snowflake.snowpark_connect.utils.interrupt import (
87
+ interrupt_all_queries,
88
+ interrupt_queries_with_tag,
89
+ interrupt_query,
90
+ )
91
+ from snowflake.snowpark_connect.utils.profiling import profile_method
92
+ from snowflake.snowpark_connect.utils.session import (
93
+ configure_snowpark_session,
94
+ get_or_create_snowpark_session,
95
+ set_query_tags,
96
+ )
97
+ from snowflake.snowpark_connect.utils.snowpark_connect_logging import (
98
+ log_waring_once_storage_level,
99
+ logger,
100
+ )
101
+ from snowflake.snowpark_connect.utils.telemetry import (
102
+ SnowparkConnectNotImplementedError,
103
+ telemetry,
104
+ )
105
+ from snowflake.snowpark_connect.utils.xxhash64 import xxhash64_string
106
+
107
+ DEFAULT_PORT = 15002
108
+
109
+ # https://github.com/apache/spark/blob/v3.5.3/connector/connect/common/src/main/scala/org/apache/spark/sql/connect/common/config/ConnectCommon.scala#L21
110
+ _SPARK_CONNECT_GRPC_MAX_MESSAGE_SIZE = 128 * 1024 * 1024
111
+ # TODO: Verify if we we want to configure it via env variables.
112
+ _SPARK_CONNECT_GRPC_MAX_METADATA_SIZE = 64 * 1024 # 64kb
113
+
114
+
115
+ def _handle_exception(context, e: Exception):
116
+ import traceback
117
+
118
+ traceback.print_exc()
119
+
120
+ telemetry.report_request_failure(e)
121
+
122
+ if tcm.TCM_MODE:
123
+ # TODO: SNOW-2009834 gracefully return error back in TCM
124
+ raise e
125
+
126
+ from grpc_status import rpc_status
127
+
128
+ rich_status = build_grpc_error_response(e)
129
+ context.abort_with_status(rpc_status.to_status(rich_status))
130
+
131
+
132
+ class SnowflakeConnectServicer(proto_base_grpc.SparkConnectServiceServicer):
133
+ def __init__(
134
+ self,
135
+ log_request_fn: Optional[Callable[[bytearray], None]] = None,
136
+ ) -> None:
137
+ self.log_request_fn = log_request_fn
138
+ # Trigger async initialization here, so that we reduce overhead for rpc calls.
139
+ initialize_resources_async()
140
+
141
+ @profile_method
142
+ def ExecutePlan(self, request: proto_base.ExecutePlanRequest, context):
143
+ """Executes a request that contains the query and returns a stream of [[Response]].
144
+
145
+ It is guaranteed that there is at least one ARROW batch returned even if the result set is empty.
146
+ """
147
+ logger.info("ExecutePlan")
148
+ if self.log_request_fn is not None:
149
+ self.log_request_fn(request.SerializeToString())
150
+
151
+ # TODO: remove session id context when we host this in Snowflake server
152
+ # set the thread-local context of session id
153
+ clear_context_data()
154
+ set_session_id(request.session_id)
155
+ set_spark_version(request.client_type)
156
+ telemetry.initialize_request_summary(request)
157
+
158
+ set_query_tags(request.tags)
159
+
160
+ result_iter = iter(())
161
+ try:
162
+ match request.plan.WhichOneof("op_type"):
163
+ case "root":
164
+ logger.info("ROOT")
165
+ result_iter = map_execution_root(request)
166
+ case "command":
167
+ logger.info("COMMAND")
168
+ command_result = map_execution_command(request)
169
+ if command_result is not None:
170
+ result_iter = iter([command_result])
171
+
172
+ yield from result_iter
173
+ yield proto_base.ExecutePlanResponse(
174
+ session_id=request.session_id,
175
+ operation_id=SERVER_SIDE_SESSION_ID,
176
+ result_complete=proto_base.ExecutePlanResponse.ResultComplete(),
177
+ )
178
+ except Exception as e:
179
+ _handle_exception(context, e)
180
+ finally:
181
+ telemetry.send_request_summary_telemetry()
182
+
183
+ @profile_method
184
+ def AnalyzePlan(self, request: proto_base.AnalyzePlanRequest, context):
185
+ """Analyzes a query and returns a [[AnalyzeResponse]] containing metadata about the query."""
186
+ logger.info(f"AnalyzePlan: {request.WhichOneof('analyze')}")
187
+ if self.log_request_fn is not None:
188
+ self.log_request_fn(request.SerializeToString())
189
+ try:
190
+ # TODO: remove session id context when we host this in Snowflake server
191
+ # set the thread-local context of session id
192
+ clear_context_data()
193
+ set_session_id(request.session_id)
194
+ set_spark_version(request.client_type)
195
+ telemetry.initialize_request_summary(request)
196
+ match request.WhichOneof("analyze"):
197
+ case "schema":
198
+ snowpark_df = map_relation(request.schema.plan.root)
199
+ snowpark_schema: snowpark.types.StructType = snowpark_df.schema
200
+ schema = proto_base.AnalyzePlanResponse.Schema(
201
+ schema=types_proto.DataType(
202
+ **snowpark_to_proto_type(
203
+ snowpark_schema, snowpark_df._column_map, snowpark_df
204
+ )
205
+ )
206
+ )
207
+ return proto_base.AnalyzePlanResponse(
208
+ session_id=request.session_id,
209
+ schema=schema,
210
+ )
211
+ case "tree_string":
212
+ return map_tree_string(request)
213
+ case "is_local":
214
+ return proto_base.AnalyzePlanResponse(
215
+ session_id=request.session_id,
216
+ is_local=proto_base.AnalyzePlanResponse.IsLocal(is_local=False),
217
+ )
218
+ case "ddl_parse":
219
+ return proto_base.AnalyzePlanResponse(
220
+ session_id=request.session_id,
221
+ ddl_parse=proto_base.AnalyzePlanResponse.DDLParse(
222
+ parsed=parse_ddl_string(request.ddl_parse.ddl_string)
223
+ ),
224
+ )
225
+ case "get_storage_level":
226
+ return proto_base.AnalyzePlanResponse(
227
+ session_id=request.session_id,
228
+ get_storage_level=proto_base.AnalyzePlanResponse.GetStorageLevel(
229
+ storage_level=common_proto.StorageLevel(
230
+ use_disk=True, use_memory=True
231
+ )
232
+ ),
233
+ )
234
+ case "persist":
235
+ plan_id = request.persist.relation.common.plan_id
236
+ # cache the plan if it is not already in the map
237
+
238
+ df_cache_map_put_if_absent(
239
+ (request.session_id, plan_id),
240
+ lambda: map_relation(request.persist.relation),
241
+ materialize=True,
242
+ )
243
+
244
+ storage_level = request.persist.storage_level
245
+ if storage_level != StorageLevel.DISK_ONLY:
246
+ log_waring_once_storage_level(storage_level)
247
+
248
+ return proto_base.AnalyzePlanResponse(
249
+ session_id=request.session_id,
250
+ persist=proto_base.AnalyzePlanResponse.Persist(),
251
+ )
252
+ case "unpersist":
253
+ plan_id = request.persist.relation.common.plan_id
254
+ # unpersist the cached plan
255
+ df_cache_map_pop((request.session_id, plan_id))
256
+
257
+ return proto_base.AnalyzePlanResponse(
258
+ session_id=request.session_id,
259
+ unpersist=proto_base.AnalyzePlanResponse.Unpersist(),
260
+ )
261
+ case "explain":
262
+ # Snowflake only exposes simplified execution plans, similar to Spark's optimized logical plans.
263
+ # Snowpark provides the execution plan IFF the dataframe maps to a single query.
264
+ # TODO: Do we need to return a Spark-like plan?
265
+ snowpark_df = map_relation(request.explain.plan.root)
266
+ return proto_base.AnalyzePlanResponse(
267
+ session_id=request.session_id,
268
+ explain=proto_base.AnalyzePlanResponse.Explain(
269
+ explain_string=snowpark_df._explain_string()
270
+ ),
271
+ )
272
+ case "spark_version":
273
+ return proto_base.AnalyzePlanResponse(
274
+ session_id=request.session_id,
275
+ spark_version=proto_base.AnalyzePlanResponse.SparkVersion(
276
+ version=pyspark.__version__
277
+ ),
278
+ )
279
+ case "same_semantics":
280
+ target_queries_hash = xxhash64_string(
281
+ get_semantic_string(request.same_semantics.target_plan.root)
282
+ )
283
+ other_queries_hash = xxhash64_string(
284
+ get_semantic_string(request.same_semantics.other_plan.root)
285
+ )
286
+ return proto_base.AnalyzePlanResponse(
287
+ session_id=request.session_id,
288
+ same_semantics=proto_base.AnalyzePlanResponse.SameSemantics(
289
+ result=target_queries_hash == other_queries_hash
290
+ ),
291
+ )
292
+ case "semantic_hash":
293
+ queries_str = get_semantic_string(request.semantic_hash.plan.root)
294
+ return proto_base.AnalyzePlanResponse(
295
+ session_id=request.session_id,
296
+ semantic_hash=proto_base.AnalyzePlanResponse.SemanticHash(
297
+ result=xxhash64_string(queries_str)
298
+ & 0x7FFFFFFF # need a 32 bit int here.
299
+ ),
300
+ )
301
+ case "is_streaming":
302
+ return proto_base.AnalyzePlanResponse(
303
+ session_id=request.session_id,
304
+ is_streaming=proto_base.AnalyzePlanResponse.IsStreaming(
305
+ is_streaming=False
306
+ ),
307
+ )
308
+ case "input_files":
309
+ files = []
310
+ if request.input_files.plan.root.HasField("read"):
311
+ files = _get_files_metadata(
312
+ request.input_files.plan.root.read.data_source
313
+ )
314
+ elif request.input_files.plan.root.HasField("join"):
315
+ left_files = _get_files_metadata(
316
+ request.input_files.plan.root.join.left.read.data_source
317
+ )
318
+ right_files = _get_files_metadata(
319
+ request.input_files.plan.root.join.right.read.data_source
320
+ )
321
+ files = left_files + right_files
322
+ return proto_base.AnalyzePlanResponse(
323
+ session_id=request.session_id,
324
+ input_files=proto_base.AnalyzePlanResponse.InputFiles(
325
+ files=list(set(files))
326
+ ),
327
+ )
328
+ case _:
329
+ raise SnowparkConnectNotImplementedError(
330
+ f"ANALYZE PLAN NOT IMPLEMENTED:\n{request}"
331
+ )
332
+ except Exception as e:
333
+ _handle_exception(context, e)
334
+ finally:
335
+ telemetry.send_request_summary_telemetry()
336
+
337
+ @staticmethod
338
+ def Config(
339
+ request: proto_base.ConfigRequest,
340
+ context,
341
+ options=(),
342
+ channel_credentials=None,
343
+ call_credentials=None,
344
+ insecure=False,
345
+ compression=None,
346
+ wait_for_ready=None,
347
+ timeout=None,
348
+ metadata=None,
349
+ ):
350
+ """Update or fetch the configurations and returns a [[ConfigResponse]] containing the result."""
351
+ logger.info("Config")
352
+ try:
353
+ telemetry.initialize_request_summary(request)
354
+ return route_config_proto(request, get_or_create_snowpark_session())
355
+ except Exception as e:
356
+ _handle_exception(context, e)
357
+ finally:
358
+ telemetry.send_request_summary_telemetry()
359
+
360
+ def AddArtifacts(self, request_iterator, context):
361
+ """Add artifacts to the session and returns a [[AddArtifactsResponse]] containing metadata about
362
+ the added artifacts.
363
+ """
364
+ logger.info("AddArtifacts")
365
+ session: snowpark.Session = get_or_create_snowpark_session()
366
+ filenames: dict[str, str] = {}
367
+ response: dict[str, proto_base.AddArtifactsResponse.ArtifactSummary] = {}
368
+ # Store accumulated data for local relation cache
369
+ cache_data: dict[str, bytearray] = {}
370
+
371
+ def _try_handle_local_relation(artifact_name: str, data: bytes):
372
+ """
373
+ Attempt to deserialize the artifact data to a LocalRelation protobuf message.
374
+ LocalRelation messages represent in-memory data that should be materialized
375
+ in temporary table in Snowflake rather than stored as file artifact.
376
+ - If successful: creates a temporary table and caches the DataFrame in `df_cache_map`
377
+ - If unsuccessful: falls back to storing as a regular file artifact
378
+ """
379
+
380
+ is_likely_local_relation = artifact_name.startswith(
381
+ "cache/"
382
+ ) # heuristic to identify local relations
383
+
384
+ def _handle_regular_artifact():
385
+ filenames[artifact_name] = write_artifact(
386
+ session,
387
+ artifact_name,
388
+ data,
389
+ overwrite=True,
390
+ )
391
+
392
+ if is_likely_local_relation:
393
+ try:
394
+ l_relation = relations_proto.LocalRelation()
395
+ l_relation.ParseFromString(data)
396
+ relation = relations_proto.Relation(local_relation=l_relation)
397
+ df_cache_map_put_if_absent(
398
+ (get_session_id(), artifact_name.replace("cache/", "")),
399
+ lambda: map_local_relation(relation), # noqa: B023
400
+ materialize=True,
401
+ )
402
+ except Exception:
403
+ # fallback - treat as regular artifact
404
+ _handle_regular_artifact()
405
+ else:
406
+ # Not a LocalRelation - treat as regular artifact
407
+ _handle_regular_artifact()
408
+
409
+ # Spark sends artifacts as iterators that are either chunked or a full batch.
410
+ #
411
+ # Chunked artifacts start with a "begin_chunk" followed by a series of "chunk"
412
+ # messages. The "chunk" messages do not contain a name, so we store the name
413
+ # in `current_name` so we can append all the chunks to the same object.
414
+ # Chunked artifacts are written incrementally as gzip files to reduce memory
415
+ # issues.
416
+ #
417
+ # Batch artifacts are sent as a single "batch" message containing a list of
418
+ # artifacts. We do not need to keep track of the name since it is included in
419
+ # each artifact.
420
+ current_name: str = ""
421
+ for request in request_iterator:
422
+ clear_context_data()
423
+ set_session_id(request.session_id)
424
+ set_spark_version(request.client_type)
425
+ match request.WhichOneof("payload"):
426
+ case "begin_chunk":
427
+ current_name = request.begin_chunk.name
428
+ assert (
429
+ current_name not in filenames
430
+ ), "Duplicate artifact name found."
431
+
432
+ if current_name.startswith("cache/"):
433
+ cache_data[current_name] = bytearray(
434
+ request.begin_chunk.initial_chunk.data
435
+ )
436
+ else:
437
+ filenames[current_name] = write_artifact(
438
+ session,
439
+ current_name,
440
+ request.begin_chunk.initial_chunk.data,
441
+ overwrite=True,
442
+ )
443
+ response[
444
+ current_name
445
+ ] = proto_base.AddArtifactsResponse.ArtifactSummary(
446
+ name=current_name,
447
+ is_crc_successful=check_checksum(
448
+ request.begin_chunk.initial_chunk.data,
449
+ request.begin_chunk.initial_chunk.crc,
450
+ ),
451
+ )
452
+ case "chunk":
453
+ if current_name.startswith("cache/"):
454
+ cache_data[current_name].extend(request.chunk.data)
455
+ else:
456
+ assert filenames[current_name] == write_artifact(
457
+ session, current_name, request.chunk.data
458
+ ), "Artifact staging error."
459
+
460
+ response[
461
+ current_name
462
+ ] = proto_base.AddArtifactsResponse.ArtifactSummary(
463
+ name=current_name,
464
+ is_crc_successful=response[current_name].is_crc_successful
465
+ and check_checksum(request.chunk.data, request.chunk.crc),
466
+ )
467
+ case "batch":
468
+ for artifact in request.batch.artifacts:
469
+ data = artifact.data.data
470
+
471
+ _try_handle_local_relation(artifact.name, data)
472
+ response[
473
+ artifact.name
474
+ ] = proto_base.AddArtifactsResponse.ArtifactSummary(
475
+ name=artifact.name,
476
+ is_crc_successful=check_checksum(
477
+ artifact.data.data, artifact.data.crc
478
+ ),
479
+ )
480
+ case _:
481
+ raise ValueError(
482
+ f"Unexpected payload type in AddArtifacts: {request.WhichOneof('payload')}"
483
+ )
484
+
485
+ for name, data in cache_data.items():
486
+ _try_handle_local_relation(name, bytes(data))
487
+
488
+ for (name, filepath) in filenames.items():
489
+ session.file.put(
490
+ filepath,
491
+ session.get_session_stage(),
492
+ auto_compress=False,
493
+ overwrite=True,
494
+ source_compression="GZIP" if name.endswith(".gz") else "NONE",
495
+ )
496
+
497
+ if name.startswith("cache"):
498
+ continue
499
+
500
+ # Remove temporary stored files which are put on the stage
501
+ os.remove(filepath)
502
+
503
+ # Add only files marked to be used in user generated Python UDFs.
504
+ cached_name = f"{session.get_session_stage()}/{filepath.split('/')[-1]}"
505
+ if not name.startswith("pyfiles") and cached_name in session._python_files:
506
+ session._python_files.remove(cached_name)
507
+ elif name.startswith("pyfiles"):
508
+ session._python_files.add(cached_name)
509
+
510
+ if not name.startswith("pyfiles"):
511
+ session._import_files.add(cached_name)
512
+
513
+ return proto_base.AddArtifactsResponse(artifacts=list(response.values()))
514
+
515
+ def ArtifactStatus(self, request, context):
516
+ """Check statuses of artifacts in the session and returns them in a [[ArtifactStatusesResponse]]"""
517
+ logger.info("ArtifactStatus")
518
+ clear_context_data()
519
+ set_session_id(request.session_id)
520
+ set_spark_version(request.client_type)
521
+ session: snowpark.Session = get_or_create_snowpark_session()
522
+ if os.name != "nt":
523
+ tmp_path = f"/tmp/sas-{session.session_id}/"
524
+ else:
525
+ tmp_path = f"{tempfile.gettempdir()}/sas-{session.session_id}/"
526
+
527
+ def _is_local_relation_cached(name: str) -> bool:
528
+ if name.startswith("cache/"):
529
+ hash = name.replace("cache/", "")
530
+ cached_df = df_cache_map_get((get_session_id(), hash))
531
+ return cached_df is not None
532
+ return False
533
+
534
+ files = []
535
+ for _, _, filenames in os.walk(tmp_path):
536
+ for filename in filenames:
537
+ files.append(filename)
538
+ if len(files) == 0:
539
+ statuses = {
540
+ name: proto_base.ArtifactStatusesResponse.ArtifactStatus(
541
+ exists=_is_local_relation_cached(name)
542
+ )
543
+ for name in request.names
544
+ }
545
+ else:
546
+ statuses = {
547
+ name: proto_base.ArtifactStatusesResponse.ArtifactStatus(
548
+ exists=(
549
+ _is_local_relation_cached(name)
550
+ or any(name.split("/")[-1] in file for file in files)
551
+ )
552
+ )
553
+ for name in request.names
554
+ }
555
+ return proto_base.ArtifactStatusesResponse(statuses=statuses)
556
+
557
+ def Interrupt(self, request: proto_base.InterruptRequest, context):
558
+ """Interrupts running executions"""
559
+ logger.info("Interrupt")
560
+ telemetry.initialize_request_summary(request)
561
+ # SAS doesn't support operation ids yet (we use a constant SERVER_SIDE_SESSION_ID mock), so
562
+ # instead of using operation ids, we're relying on Snowflake query ids here, meaning that:
563
+ # - The list of returned interrupted_ids contains query ids of interrupted jobs, instead of their operation ids
564
+ # - INTERRUPT_TYPE_OPERATION_ID interrupt type expects a Snowflake query id instead of an operation id
565
+ try:
566
+ match request.interrupt_type:
567
+ case proto_base.InterruptRequest.InterruptType.INTERRUPT_TYPE_ALL:
568
+ interrupted_ids = interrupt_all_queries()
569
+ case proto_base.InterruptRequest.InterruptType.INTERRUPT_TYPE_TAG:
570
+ interrupted_ids = interrupt_queries_with_tag(request.operation_tag)
571
+ case proto_base.InterruptRequest.InterruptType.INTERRUPT_TYPE_OPERATION_ID:
572
+ interrupted_ids = interrupt_query(request.operation_id)
573
+ case _:
574
+ raise SnowparkConnectNotImplementedError(
575
+ f"INTERRUPT NOT IMPLEMENTED:\n{request}"
576
+ )
577
+
578
+ return proto_base.InterruptResponse(
579
+ session_id=request.session_id,
580
+ interrupted_ids=interrupted_ids,
581
+ )
582
+ except Exception as e:
583
+ _handle_exception(context, e)
584
+ finally:
585
+ telemetry.send_request_summary_telemetry()
586
+
587
+ def ReattachExecute(self, request: proto_base.ReattachExecuteRequest, context):
588
+ """Reattach to an existing reattachable execution.
589
+ The ExecutePlan must have been started with ReattachOptions.reattachable=true.
590
+ If the ExecutePlanResponse stream ends without a ResultComplete message, there is more to
591
+ continue. If there is a ResultComplete, the client should use ReleaseExecute with
592
+ """
593
+ logger.info("ReattachExecute")
594
+ raise SnowparkConnectNotImplementedError(
595
+ "Spark client has detached, please resubmit request. In a future version, the server will be support the reattach."
596
+ )
597
+
598
+ def ReleaseExecute(self, request: proto_base.ReleaseExecuteRequest, context):
599
+ """Release an reattachable execution, or parts thereof.
600
+ The ExecutePlan must have been started with ReattachOptions.reattachable=true.
601
+ Non reattachable executions are released automatically and immediately after the ExecutePlan
602
+ RPC and ReleaseExecute may not be used.
603
+ """
604
+ try:
605
+ logger.info("ReleaseExecute")
606
+ return proto_base.ReleaseExecuteResponse(
607
+ session_id=request.session_id,
608
+ operation_id=SERVER_SIDE_SESSION_ID,
609
+ )
610
+ except Exception as e:
611
+ _handle_exception(context, e)
612
+
613
+ # TODO: These are required in Spark 4.x.
614
+ # def ReleaseSession(self, request, context):
615
+ # """Release a session.
616
+ # All the executions in the session will be released. Any further requests for the session with
617
+ # that session_id for the given user_id will fail. If the session didn't exist or was already
618
+ # released, this is a noop.
619
+ # """
620
+ # logger.info("ReleaseSession")
621
+ # return super().ReleaseSession(request, context)
622
+ #
623
+ # def FetchErrorDetails(self, request, context):
624
+ # """FetchErrorDetails retrieves the matched exception with details based on a provided error id."""
625
+ # logger.info("FetchErrorDetails")
626
+ # return super().FetchErrorDetails(request, context)
627
+
628
+
629
+ # Global state related to server connection
630
+ _server_running: threading.Event = threading.Event()
631
+ _server_error: bool = False
632
+ _server_url: Optional[str] = None
633
+ _client_url: Optional[str] = None
634
+
635
+
636
+ # Used to reset server global state to the initial blank slate state if error happens during server startup.
637
+ # Called after the startup error is caught and handled / logged etc.
638
+ def _reset_server_run_state():
639
+ global _server_running, _server_error, _server_url, _client_url
640
+ _server_running.clear()
641
+ _server_error = False
642
+ _server_url = None
643
+ _client_url = None
644
+
645
+
646
+ def _stop_server(stop_event: threading.Event, server: grpc.Server):
647
+ stop_event.wait()
648
+ server.stop(0)
649
+ _reset_server_run_state()
650
+ logger.info("server stop sent")
651
+
652
+
653
+ def _serve(
654
+ stop_event: Optional[threading.Event] = None,
655
+ session: Optional[snowpark.Session] = None,
656
+ ):
657
+ global _server_running, _server_error
658
+ # TODO: factor out the Snowflake connection code.
659
+ server = None
660
+ try:
661
+ config_snowpark()
662
+ if session is None:
663
+ session = get_or_create_snowpark_session()
664
+ else:
665
+ # If a session is passed in, explicitly call config session to be consistent with sessions created
666
+ # under the hood.
667
+ configure_snowpark_session(session)
668
+ if tcm.TCM_MODE:
669
+ # No need to start grpc server in TCM
670
+ return
671
+
672
+ server_options = [
673
+ ("grpc.max_receive_message_length", _SPARK_CONNECT_GRPC_MAX_MESSAGE_SIZE),
674
+ ("grpc.max_metadata_size", _SPARK_CONNECT_GRPC_MAX_METADATA_SIZE),
675
+ (
676
+ "grpc.absolute_max_metadata_size",
677
+ _SPARK_CONNECT_GRPC_MAX_METADATA_SIZE * 2,
678
+ ),
679
+ ]
680
+ server = grpc.server(
681
+ futures.ThreadPoolExecutor(max_workers=10), options=server_options
682
+ )
683
+ control_servicer = ControlServicer(session)
684
+ proto_base_grpc.add_SparkConnectServiceServicer_to_server(
685
+ SnowflakeConnectServicer(control_servicer.log_spark_connect_batch),
686
+ server,
687
+ )
688
+ control_grpc.add_ControlServiceServicer_to_server(control_servicer, server)
689
+ server_url = get_server_url()
690
+ server.add_insecure_port(server_url)
691
+ logger.info(f"Starting Snowpark Connect server on {server_url}...")
692
+ server.start()
693
+ _server_running.set()
694
+ logger.info("Snowpark Connect server started!")
695
+ telemetry.send_server_started_telemetry()
696
+ if stop_event is not None:
697
+ # start a background thread to listen for stop event and terminate the server
698
+ threading.Thread(
699
+ target=_stop_server, args=(stop_event, server), daemon=True
700
+ ).start()
701
+ server.wait_for_termination()
702
+ except Exception as e:
703
+ _server_error = True
704
+ _server_running.set() # unblock any client sessions
705
+ if "Invalid connection_name 'spark-connect', known ones are " in str(e):
706
+ logger.error(
707
+ "Ensure 'spark-connect' connection config has been set correctly in connections.toml."
708
+ )
709
+ else:
710
+ logger.error("Error starting up Snowpark Connect server", exc_info=True)
711
+ raise e
712
+ finally:
713
+ # flush the telemetry queue if possible
714
+ telemetry.shutdown()
715
+
716
+
717
+ def _set_remote_url(remote_url: str):
718
+ global _server_url, _client_url
719
+ _client_url = remote_url
720
+ parsed_url = urllib.parse.urlparse(remote_url)
721
+ if parsed_url.scheme == "sc":
722
+ _server_url = parsed_url.netloc
723
+ server_port = parsed_url.port or DEFAULT_PORT
724
+ _check_port_is_free(server_port)
725
+ elif parsed_url.scheme == "unix":
726
+ _server_url = remote_url.split("/;")[0]
727
+ else:
728
+ raise RuntimeError(f"Invalid Snowpark Connect URL: {remote_url}")
729
+
730
+
731
+ def _set_server_tcp_port(server_port: int):
732
+ global _server_url, _client_url
733
+ _check_port_is_free(server_port)
734
+ _server_url = f"[::]:{server_port}"
735
+ _client_url = f"sc://127.0.0.1:{server_port}"
736
+
737
+
738
+ def _check_port_is_free(port: int) -> None:
739
+ with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
740
+ s.settimeout(1)
741
+ if s.connect_ex(("127.0.0.1", port)) == 0:
742
+ raise RuntimeError(f"TCP port {port} is already in use")
743
+
744
+
745
+ def _set_server_unix_domain_socket(path: str):
746
+ global _server_url, _client_url
747
+ _server_url = f"unix:{path}"
748
+ _client_url = f"unix:{path}"
749
+
750
+
751
+ def get_server_url() -> str:
752
+ global _server_url
753
+ if not _server_url:
754
+ raise RuntimeError("Server URL not set")
755
+ return _server_url
756
+
757
+
758
+ def get_client_url() -> str:
759
+ global _client_url
760
+ if not _client_url:
761
+ raise RuntimeError("Client URL not set")
762
+ return _client_url
763
+
764
+
765
+ def _make_unix_domain_socket() -> str:
766
+ parent_dir = tempfile.mkdtemp()
767
+ server_path = os.path.join(parent_dir, "snowflake_sas_grpc.sock")
768
+ atexit.register(_cleanup_unix_domain_socket, server_path)
769
+ return server_path
770
+
771
+
772
+ def _cleanup_unix_domain_socket(server_path: str) -> None:
773
+ parent_dir = os.path.dirname(server_path)
774
+ if os.path.exists(server_path):
775
+ os.remove(server_path)
776
+ if os.path.exists(parent_dir):
777
+ os.rmdir(parent_dir)
778
+
779
+
780
+ class UnixDomainSocketChannelBuilder(ChannelBuilder):
781
+ """
782
+ Spark Connect gRPC channel builder for Unix domain sockets
783
+ """
784
+
785
+ def __init__(self, channelOptions: Optional[List[Tuple[str, Any]]] = None) -> None:
786
+ url: str = get_client_url()
787
+ if url[:6] != "unix:/" or len(url) < 7:
788
+ raise PySparkValueError(
789
+ error_class="INVALID_CONNECT_URL",
790
+ message_parameters={
791
+ "detail": "The URL must start with 'unix://'. Please update the URL to follow the correct format, e.g., 'unix://unix_domain_socket_path'.",
792
+ },
793
+ )
794
+
795
+ # Rewrite the URL to use http as the scheme so that we can leverage
796
+ # Python's built-in parser to parse URL parameters
797
+ fake_url = "http://" + url[6:]
798
+ self.url = urllib.parse.urlparse(fake_url)
799
+ self.params: Dict[str, str] = {}
800
+ self._extract_attributes()
801
+
802
+ # Now parse the real unix domain socket URL
803
+ self.url = urllib.parse.urlparse(url)
804
+
805
+ GRPC_DEFAULT_OPTIONS = [
806
+ ("grpc.max_send_message_length", _SPARK_CONNECT_GRPC_MAX_MESSAGE_SIZE),
807
+ ("grpc.max_receive_message_length", _SPARK_CONNECT_GRPC_MAX_MESSAGE_SIZE),
808
+ ("grpc.max_metadata_size", _SPARK_CONNECT_GRPC_MAX_METADATA_SIZE),
809
+ (
810
+ "grpc.absolute_max_metadata_size",
811
+ 2 * _SPARK_CONNECT_GRPC_MAX_METADATA_SIZE,
812
+ ),
813
+ ]
814
+
815
+ if channelOptions is None:
816
+ self._channel_options = GRPC_DEFAULT_OPTIONS
817
+ else:
818
+ self._channel_options = GRPC_DEFAULT_OPTIONS + channelOptions
819
+ # For Spark 4.0 support, but also backwards compatible.
820
+ self._params = self.params
821
+
822
+ def _extract_attributes(self) -> None:
823
+ """Extract attributes from parameters.
824
+
825
+ This method was copied from
826
+ https://github.com/apache/spark/blob/branch-3.5/python/pyspark/sql/connect/client/core.py
827
+
828
+ This is required for Spark 4.0 support, since it is dropped in favor of moving
829
+ the extraction logic into the constructor.
830
+ """
831
+ if len(self.url.params) > 0:
832
+ parts = self.url.params.split(";")
833
+ for p in parts:
834
+ kv = p.split("=")
835
+ if len(kv) != 2:
836
+ raise PySparkValueError(
837
+ error_class="INVALID_CONNECT_URL",
838
+ message_parameters={
839
+ "detail": f"Parameter '{p}' should be provided as a "
840
+ f"key-value pair separated by an equal sign (=). Please update "
841
+ f"the parameter to follow the correct format, e.g., 'key=value'.",
842
+ },
843
+ )
844
+ self.params[kv[0]] = urllib.parse.unquote(kv[1])
845
+
846
+ netloc = self.url.netloc.split(":")
847
+ if len(netloc) == 1:
848
+ self.host = netloc[0]
849
+ if version.parse(pyspark.__version__) >= version.parse("4.0.0"):
850
+ from pyspark.sql.connect.client.core import DefaultChannelBuilder
851
+
852
+ self.port = DefaultChannelBuilder.default_port()
853
+ else:
854
+ self.port = ChannelBuilder.default_port()
855
+ elif len(netloc) == 2:
856
+ self.host = netloc[0]
857
+ self.port = int(netloc[1])
858
+ else:
859
+ raise PySparkValueError(
860
+ error_class="INVALID_CONNECT_URL",
861
+ message_parameters={
862
+ "detail": f"Target destination '{self.url.netloc}' should match the "
863
+ f"'<host>:<port>' pattern. Please update the destination to follow "
864
+ f"the correct format, e.g., 'hostname:port'.",
865
+ },
866
+ )
867
+
868
+ # We override this to enable compatibility with Spark 4.0
869
+ host = None
870
+
871
+ @property
872
+ def endpoint(self) -> str:
873
+ return f"{self.url.scheme}:{self.url.path}"
874
+
875
+ def toChannel(self) -> grpc.Channel:
876
+ return grpc.insecure_channel(self.endpoint, options=self._channel_options)
877
+
878
+
879
+ def config_snowpark() -> None:
880
+ """
881
+ Some snowpark configs required by SAS.
882
+ """
883
+
884
+ # Enable structType. Require snowpark 1.27.0 or snowpark main branch after commit 888cec55c4
885
+ import snowflake.snowpark.context as context
886
+
887
+ context._use_structured_type_semantics = True
888
+ context._is_snowpark_connect_compatible_mode = True
889
+
890
+
891
+ def start_jvm():
892
+ # The JVM is used to run the Spark parser and JDBC drivers,
893
+ # so needs to be configured to support both.
894
+
895
+ # JDBC driver .jars are added using the CLASSPATH env var.
896
+ # We then add the Spark parser jars (that are shipped with pyspark)
897
+ # by appending them to the default classpath.
898
+
899
+ # Since we need to control JVM's parameters, fail immediately
900
+ # if the JVM has already been started elsewhere.
901
+ if jpype.isJVMStarted():
902
+ if tcm.TCM_MODE:
903
+ # No-op if JVM is already started in TCM mode
904
+ return
905
+ raise RuntimeError(
906
+ "JVM must not be running when starting the Spark Connect server"
907
+ )
908
+
909
+ pyspark_jars = (
910
+ pathlib.Path(snowflake.snowpark_connect.__file__).parent / "includes/jars"
911
+ )
912
+
913
+ if "dataframe_processor.zip" in str(pyspark_jars):
914
+ # importlib.resource doesn't work when local stage package is used in TCM
915
+ zip_path = pathlib.Path(
916
+ snowflake.snowpark_connect.__file__
917
+ ).parent.parent.parent
918
+ temp_dir = tempfile.gettempdir()
919
+
920
+ extract_folder = "snowflake/snowpark_connect/includes/jars/" # Folder to extract (must end with '/')
921
+
922
+ with zipfile.ZipFile(zip_path, "r") as zip_ref:
923
+ for member in zip_ref.namelist():
924
+ if member.startswith(extract_folder):
925
+ zip_ref.extract(member, path=temp_dir)
926
+
927
+ pyspark_jars = pathlib.Path(temp_dir) / extract_folder
928
+
929
+ for path in pyspark_jars.glob("**/*.jar"):
930
+ jpype.addClassPath(path)
931
+
932
+ # TODO: Should remove convertStrings, but it breaks the JDBC code.
933
+ jvm_settings: list[str] = list(
934
+ filter(
935
+ lambda e: e != "",
936
+ os.environ.get("JAVA_OPTS", "").split(),
937
+ )
938
+ )
939
+ # Add JVM memory constraints to reduce memory usage
940
+ jpype.startJVM(
941
+ *jvm_settings,
942
+ convertStrings=True,
943
+ )
944
+
945
+
946
+ def start_session(
947
+ is_daemon: bool = True,
948
+ remote_url: Optional[str] = None,
949
+ tcp_port: Optional[int] = None,
950
+ unix_domain_socket: Optional[str] = None,
951
+ stop_event: threading.Event = None,
952
+ snowpark_session: Optional[snowpark.Session] = None,
953
+ connection_parameters: Optional[Dict[str, str]] = None,
954
+ ) -> threading.Thread | None:
955
+ """
956
+ Starts Spark Connect server connected to Snowflake. No-op if the Server is already running.
957
+
958
+ Parameters:
959
+ is_daemon (bool): Should run the server as daemon or not. use True to automatically shut the Spark connect
960
+ server down when the main program (or test) finishes. use False to start the server in a
961
+ stand-alone, long-running mode.
962
+ remote_url (Optional[str]): sc:// URL on which to start the Spark Connect server. This option is incompatible with the tcp_port
963
+ and unix_domain_socket parameters.
964
+ tcp_port (Optional[int]): TCP port on which to start the Spark Connect server. This option is incompatible with
965
+ the remote_url and unix_domain_socket parameters.
966
+ unix_domain_socket (Optional[str]): Path to the unix domain socket on which to start the Spark Connect server.
967
+ This option is incompatible with the remote_url and tcp_port parameters.
968
+ stop_event (Optional[threading.Event]): Stop the SAS server when stop_event.set() is called.
969
+ Only works when is_daemon=True.
970
+ snowpark_session: A Snowpark session to use for this connection; currently the only applicable use of this is to
971
+ pass in the session created by the stored proc environment.
972
+ connection_parameters: A dictionary of connection parameters to use to create the Snowpark session. If this is
973
+ provided, the `snowpark_session` parameter must be None.
974
+ """
975
+ try:
976
+ if os.environ.get("SPARK_ENV_LOADED"):
977
+ raise RuntimeError(
978
+ "Snowpark Connect cannot be run inside of a Spark environment"
979
+ )
980
+ if connection_parameters is not None:
981
+ if snowpark_session is not None:
982
+ raise ValueError(
983
+ "Only specify one of snowpark_session and connection_parameters"
984
+ )
985
+ snowpark_session = snowpark.Session.builder.configs(
986
+ connection_parameters
987
+ ).create()
988
+
989
+ global _server_running, _server_error
990
+ if _server_running.is_set():
991
+ url = get_client_url()
992
+ logger.warning(f"Snowpark Connect session is already running at {url}")
993
+ return
994
+
995
+ if len(list(filter(None, [remote_url, tcp_port, unix_domain_socket]))) > 1:
996
+ raise RuntimeError(
997
+ "Can only set at most one of remote_url, tcp_port, and unix_domain_socket"
998
+ )
999
+
1000
+ url_from_env = os.environ.get("SPARK_REMOTE", None)
1001
+ if remote_url:
1002
+ _set_remote_url(remote_url)
1003
+ elif tcp_port:
1004
+ _set_server_tcp_port(tcp_port)
1005
+ elif unix_domain_socket:
1006
+ _set_server_unix_domain_socket(unix_domain_socket)
1007
+ elif url_from_env:
1008
+ # Spark clients use environment variable SPARK_REMOTE to figure out Spark Connect URL. If none of the
1009
+ # connection properties (remote_url, tcp_port, unix_domain_socket) are explicitly passed in to this
1010
+ # function then we should try and mimic clients' behavior
1011
+ # i.e. read the server URL from the SPARK_REMOTE environment variable.
1012
+ _set_remote_url(url_from_env)
1013
+ else:
1014
+ # No connection properties can be found at all - either as arguments to this function or int the environment
1015
+ # variable. We use random, unique Unix Domain Socket as a last fallback. Client can connect to this randomly
1016
+ # generated UDS port using snowpark_connect.get_session().
1017
+ # Mostly used in stored procs and Notebooks to avoid port conflicts.
1018
+ if os.name == "nt":
1019
+ # Windows does not support unix domain sockets, so use default TCP port instead.
1020
+ _set_server_tcp_port(DEFAULT_PORT)
1021
+ else:
1022
+ # Generate unique, random UDS port. Mostly useful in stored proc environment to avoid port conflicts.
1023
+ unix_domain_socket = _make_unix_domain_socket()
1024
+ _set_server_unix_domain_socket(unix_domain_socket)
1025
+
1026
+ start_jvm()
1027
+ _disable_protobuf_recursion_limit()
1028
+
1029
+ if is_daemon:
1030
+ arguments = (stop_event, snowpark_session)
1031
+ # `daemon=True` ensures the server thread exits when script finishes.
1032
+ server_thread = threading.Thread(target=_serve, args=arguments, daemon=True)
1033
+ server_thread.start()
1034
+ _server_running.wait()
1035
+ if _server_error:
1036
+ raise RuntimeError("Snowpark Connect session failed to start")
1037
+ return server_thread
1038
+ else:
1039
+ # Launch in the foreground.
1040
+ _serve(session=snowpark_session)
1041
+ except Exception as e:
1042
+ _reset_server_run_state()
1043
+ logger.error(e, exc_info=True)
1044
+ raise e
1045
+
1046
+
1047
+ def get_session(url: Optional[str] = None, conf: SparkConf = None) -> SparkSession:
1048
+ """
1049
+ Returns spark connect session
1050
+
1051
+ Parameters:
1052
+ url (Optional[str]): Spark connect server URL. Uses default server URL if none is provided.
1053
+
1054
+ Returns:
1055
+ A new spark connect session
1056
+
1057
+ Raises:
1058
+ RuntimeError: If Spark Connect server is not started.
1059
+ """
1060
+ try:
1061
+ if not url:
1062
+ url = get_client_url()
1063
+
1064
+ if url.startswith("unix:/"):
1065
+ b = SparkSession.builder.channelBuilder(UnixDomainSocketChannelBuilder())
1066
+ else:
1067
+ b = SparkSession.builder.remote(url)
1068
+
1069
+ if conf is not None:
1070
+ for k, v in conf.getAll():
1071
+ b.config(k, v)
1072
+
1073
+ return b.getOrCreate()
1074
+ except Exception as e:
1075
+ _reset_server_run_state()
1076
+ logger.error(e, exc_info=True)
1077
+ raise e
1078
+
1079
+
1080
+ def init_spark_session(conf: SparkConf = None) -> SparkSession:
1081
+ try:
1082
+ # For Notebooks on SPCS
1083
+ from jdk4py import JAVA_HOME
1084
+
1085
+ os.environ["JAVA_HOME"] = str(JAVA_HOME)
1086
+ except ModuleNotFoundError:
1087
+ # For notebooks on Warehouse
1088
+ os.environ["JAVA_HOME"] = os.environ["CONDA_PREFIX"]
1089
+ os.environ["JAVA_LD_LIBRARY_PATH"] = os.path.join(
1090
+ os.environ["CONDA_PREFIX"], "lib", "server"
1091
+ )
1092
+ logger.info("JAVA_HOME=%s", os.environ["JAVA_HOME"])
1093
+
1094
+ os.environ["SPARK_LOCAL_HOSTNAME"] = "127.0.0.1"
1095
+ os.environ["SPARK_CONNECT_MODE_ENABLED"] = "1"
1096
+
1097
+ snowpark_session = snowpark.context.get_active_session()
1098
+ start_session(snowpark_session=snowpark_session)
1099
+ return get_session(conf=conf)
1100
+
1101
+
1102
+ def enable_debug_logging():
1103
+ logger.setLevel(logging.DEBUG)
1104
+ for handler in logger.handlers:
1105
+ handler.setLevel(logging.DEBUG)
1106
+
1107
+
1108
+ def _get_files_metadata(data_source: relations_proto.Read.DataSource) -> List[str]:
1109
+ # TODO: Handle paths on the cloud
1110
+ paths = data_source.paths
1111
+ extension = data_source.format if data_source.format != "text" else "txt"
1112
+ files = []
1113
+ for path in paths:
1114
+ if os.path.isfile(path):
1115
+ files.append(f"file://{path}")
1116
+ else:
1117
+ files.extend(
1118
+ [
1119
+ f"file://{path}/{f}"
1120
+ for f in os.listdir(path)
1121
+ if f.endswith(extension)
1122
+ ]
1123
+ )
1124
+ return files
1125
+
1126
+
1127
+ def _disable_protobuf_recursion_limit():
1128
+ # https://github.com/protocolbuffers/protobuf/blob/960e79087b332583c80537c949621108a85aa442/src/google/protobuf/io/coded_stream.h#L616
1129
+ # Disable protobuf recursion limit (default 100) because Spark workloads often produce deeply nested execution plans. For example:
1130
+ # - Queries with many unions
1131
+ # - Complex expressions with multiple levels of nesting
1132
+ # Without this, legitimate Spark queries would fail with `(DecodeError) Error parsing message with type 'spark.connect.Relation'` error.
1133
+ # see test_sql_resulting_in_nested_protobuf
1134
+ from google.protobuf.pyext import cpp_message
1135
+
1136
+ cpp_message._message.SetAllowOversizeProtos(True)