snowpark-connect 0.20.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of snowpark-connect might be problematic. Click here for more details.

Files changed (879) hide show
  1. snowflake/snowpark_connect/__init__.py +23 -0
  2. snowflake/snowpark_connect/analyze_plan/__init__.py +3 -0
  3. snowflake/snowpark_connect/analyze_plan/map_tree_string.py +38 -0
  4. snowflake/snowpark_connect/column_name_handler.py +735 -0
  5. snowflake/snowpark_connect/config.py +576 -0
  6. snowflake/snowpark_connect/constants.py +47 -0
  7. snowflake/snowpark_connect/control_server.py +52 -0
  8. snowflake/snowpark_connect/dataframe_name_handler.py +54 -0
  9. snowflake/snowpark_connect/date_time_format_mapping.py +399 -0
  10. snowflake/snowpark_connect/empty_dataframe.py +18 -0
  11. snowflake/snowpark_connect/error/__init__.py +11 -0
  12. snowflake/snowpark_connect/error/error_mapping.py +6174 -0
  13. snowflake/snowpark_connect/error/error_utils.py +321 -0
  14. snowflake/snowpark_connect/error/exceptions.py +24 -0
  15. snowflake/snowpark_connect/execute_plan/__init__.py +3 -0
  16. snowflake/snowpark_connect/execute_plan/map_execution_command.py +204 -0
  17. snowflake/snowpark_connect/execute_plan/map_execution_root.py +173 -0
  18. snowflake/snowpark_connect/execute_plan/utils.py +183 -0
  19. snowflake/snowpark_connect/expression/__init__.py +3 -0
  20. snowflake/snowpark_connect/expression/literal.py +90 -0
  21. snowflake/snowpark_connect/expression/map_cast.py +343 -0
  22. snowflake/snowpark_connect/expression/map_expression.py +293 -0
  23. snowflake/snowpark_connect/expression/map_extension.py +104 -0
  24. snowflake/snowpark_connect/expression/map_sql_expression.py +633 -0
  25. snowflake/snowpark_connect/expression/map_udf.py +142 -0
  26. snowflake/snowpark_connect/expression/map_unresolved_attribute.py +241 -0
  27. snowflake/snowpark_connect/expression/map_unresolved_extract_value.py +85 -0
  28. snowflake/snowpark_connect/expression/map_unresolved_function.py +9450 -0
  29. snowflake/snowpark_connect/expression/map_unresolved_star.py +218 -0
  30. snowflake/snowpark_connect/expression/map_update_fields.py +164 -0
  31. snowflake/snowpark_connect/expression/map_window_function.py +258 -0
  32. snowflake/snowpark_connect/expression/typer.py +125 -0
  33. snowflake/snowpark_connect/includes/__init__.py +0 -0
  34. snowflake/snowpark_connect/includes/jars/antlr4-runtime-4.9.3.jar +0 -0
  35. snowflake/snowpark_connect/includes/jars/commons-cli-1.5.0.jar +0 -0
  36. snowflake/snowpark_connect/includes/jars/commons-codec-1.16.1.jar +0 -0
  37. snowflake/snowpark_connect/includes/jars/commons-collections-3.2.2.jar +0 -0
  38. snowflake/snowpark_connect/includes/jars/commons-collections4-4.4.jar +0 -0
  39. snowflake/snowpark_connect/includes/jars/commons-compiler-3.1.9.jar +0 -0
  40. snowflake/snowpark_connect/includes/jars/commons-compress-1.26.0.jar +0 -0
  41. snowflake/snowpark_connect/includes/jars/commons-crypto-1.1.0.jar +0 -0
  42. snowflake/snowpark_connect/includes/jars/commons-dbcp-1.4.jar +0 -0
  43. snowflake/snowpark_connect/includes/jars/commons-io-2.16.1.jar +0 -0
  44. snowflake/snowpark_connect/includes/jars/commons-lang-2.6.jar +0 -0
  45. snowflake/snowpark_connect/includes/jars/commons-lang3-3.12.0.jar +0 -0
  46. snowflake/snowpark_connect/includes/jars/commons-logging-1.1.3.jar +0 -0
  47. snowflake/snowpark_connect/includes/jars/commons-math3-3.6.1.jar +0 -0
  48. snowflake/snowpark_connect/includes/jars/commons-pool-1.5.4.jar +0 -0
  49. snowflake/snowpark_connect/includes/jars/commons-text-1.10.0.jar +0 -0
  50. snowflake/snowpark_connect/includes/jars/hadoop-client-api-3.3.4.jar +0 -0
  51. snowflake/snowpark_connect/includes/jars/jackson-annotations-2.15.2.jar +0 -0
  52. snowflake/snowpark_connect/includes/jars/jackson-core-2.15.2.jar +0 -0
  53. snowflake/snowpark_connect/includes/jars/jackson-core-asl-1.9.13.jar +0 -0
  54. snowflake/snowpark_connect/includes/jars/jackson-databind-2.15.2.jar +0 -0
  55. snowflake/snowpark_connect/includes/jars/jackson-dataformat-yaml-2.15.2.jar +0 -0
  56. snowflake/snowpark_connect/includes/jars/jackson-datatype-jsr310-2.15.2.jar +0 -0
  57. snowflake/snowpark_connect/includes/jars/jackson-mapper-asl-1.9.13.jar +0 -0
  58. snowflake/snowpark_connect/includes/jars/jackson-module-scala_2.12-2.15.2.jar +0 -0
  59. snowflake/snowpark_connect/includes/jars/json4s-ast_2.12-3.7.0-M11.jar +0 -0
  60. snowflake/snowpark_connect/includes/jars/json4s-core_2.12-3.7.0-M11.jar +0 -0
  61. snowflake/snowpark_connect/includes/jars/json4s-jackson_2.12-3.7.0-M11.jar +0 -0
  62. snowflake/snowpark_connect/includes/jars/json4s-scalap_2.12-3.7.0-M11.jar +0 -0
  63. snowflake/snowpark_connect/includes/jars/kryo-shaded-4.0.2.jar +0 -0
  64. snowflake/snowpark_connect/includes/jars/log4j-1.2-api-2.20.0.jar +0 -0
  65. snowflake/snowpark_connect/includes/jars/log4j-api-2.20.0.jar +0 -0
  66. snowflake/snowpark_connect/includes/jars/log4j-core-2.20.0.jar +0 -0
  67. snowflake/snowpark_connect/includes/jars/log4j-slf4j2-impl-2.20.0.jar +0 -0
  68. snowflake/snowpark_connect/includes/jars/paranamer-2.8.jar +0 -0
  69. snowflake/snowpark_connect/includes/jars/scala-collection-compat_2.12-2.7.0.jar +0 -0
  70. snowflake/snowpark_connect/includes/jars/scala-compiler-2.12.18.jar +0 -0
  71. snowflake/snowpark_connect/includes/jars/scala-library-2.12.18.jar +0 -0
  72. snowflake/snowpark_connect/includes/jars/scala-parser-combinators_2.12-2.3.0.jar +0 -0
  73. snowflake/snowpark_connect/includes/jars/scala-reflect-2.12.18.jar +0 -0
  74. snowflake/snowpark_connect/includes/jars/scala-xml_2.12-2.1.0.jar +0 -0
  75. snowflake/snowpark_connect/includes/jars/slf4j-api-2.0.7.jar +0 -0
  76. snowflake/snowpark_connect/includes/jars/spark-catalyst_2.12-3.5.6.jar +0 -0
  77. snowflake/snowpark_connect/includes/jars/spark-common-utils_2.12-3.5.6.jar +0 -0
  78. snowflake/snowpark_connect/includes/jars/spark-core_2.12-3.5.6.jar +0 -0
  79. snowflake/snowpark_connect/includes/jars/spark-graphx_2.12-3.5.6.jar +0 -0
  80. snowflake/snowpark_connect/includes/jars/spark-hive-thriftserver_2.12-3.5.6.jar +0 -0
  81. snowflake/snowpark_connect/includes/jars/spark-hive_2.12-3.5.6.jar +0 -0
  82. snowflake/snowpark_connect/includes/jars/spark-kubernetes_2.12-3.5.6.jar +0 -0
  83. snowflake/snowpark_connect/includes/jars/spark-kvstore_2.12-3.5.6.jar +0 -0
  84. snowflake/snowpark_connect/includes/jars/spark-launcher_2.12-3.5.6.jar +0 -0
  85. snowflake/snowpark_connect/includes/jars/spark-mesos_2.12-3.5.6.jar +0 -0
  86. snowflake/snowpark_connect/includes/jars/spark-mllib-local_2.12-3.5.6.jar +0 -0
  87. snowflake/snowpark_connect/includes/jars/spark-mllib_2.12-3.5.6.jar +0 -0
  88. snowflake/snowpark_connect/includes/jars/spark-network-common_2.12-3.5.6.jar +0 -0
  89. snowflake/snowpark_connect/includes/jars/spark-network-shuffle_2.12-3.5.6.jar +0 -0
  90. snowflake/snowpark_connect/includes/jars/spark-repl_2.12-3.5.6.jar +0 -0
  91. snowflake/snowpark_connect/includes/jars/spark-sketch_2.12-3.5.6.jar +0 -0
  92. snowflake/snowpark_connect/includes/jars/spark-sql-api_2.12-3.5.6.jar +0 -0
  93. snowflake/snowpark_connect/includes/jars/spark-sql_2.12-3.5.6.jar +0 -0
  94. snowflake/snowpark_connect/includes/jars/spark-streaming_2.12-3.5.6.jar +0 -0
  95. snowflake/snowpark_connect/includes/jars/spark-tags_2.12-3.5.6.jar +0 -0
  96. snowflake/snowpark_connect/includes/jars/spark-unsafe_2.12-3.5.6.jar +0 -0
  97. snowflake/snowpark_connect/includes/jars/spark-yarn_2.12-3.5.6.jar +0 -0
  98. snowflake/snowpark_connect/includes/python/__init__.py +21 -0
  99. snowflake/snowpark_connect/includes/python/pyspark/__init__.py +173 -0
  100. snowflake/snowpark_connect/includes/python/pyspark/_globals.py +71 -0
  101. snowflake/snowpark_connect/includes/python/pyspark/_typing.pyi +43 -0
  102. snowflake/snowpark_connect/includes/python/pyspark/accumulators.py +341 -0
  103. snowflake/snowpark_connect/includes/python/pyspark/broadcast.py +383 -0
  104. snowflake/snowpark_connect/includes/python/pyspark/cloudpickle/__init__.py +8 -0
  105. snowflake/snowpark_connect/includes/python/pyspark/cloudpickle/cloudpickle.py +948 -0
  106. snowflake/snowpark_connect/includes/python/pyspark/cloudpickle/cloudpickle_fast.py +844 -0
  107. snowflake/snowpark_connect/includes/python/pyspark/cloudpickle/compat.py +18 -0
  108. snowflake/snowpark_connect/includes/python/pyspark/conf.py +276 -0
  109. snowflake/snowpark_connect/includes/python/pyspark/context.py +2601 -0
  110. snowflake/snowpark_connect/includes/python/pyspark/daemon.py +218 -0
  111. snowflake/snowpark_connect/includes/python/pyspark/errors/__init__.py +70 -0
  112. snowflake/snowpark_connect/includes/python/pyspark/errors/error_classes.py +889 -0
  113. snowflake/snowpark_connect/includes/python/pyspark/errors/exceptions/__init__.py +16 -0
  114. snowflake/snowpark_connect/includes/python/pyspark/errors/exceptions/base.py +228 -0
  115. snowflake/snowpark_connect/includes/python/pyspark/errors/exceptions/captured.py +307 -0
  116. snowflake/snowpark_connect/includes/python/pyspark/errors/exceptions/connect.py +190 -0
  117. snowflake/snowpark_connect/includes/python/pyspark/errors/tests/__init__.py +16 -0
  118. snowflake/snowpark_connect/includes/python/pyspark/errors/tests/test_errors.py +60 -0
  119. snowflake/snowpark_connect/includes/python/pyspark/errors/utils.py +116 -0
  120. snowflake/snowpark_connect/includes/python/pyspark/files.py +165 -0
  121. snowflake/snowpark_connect/includes/python/pyspark/find_spark_home.py +95 -0
  122. snowflake/snowpark_connect/includes/python/pyspark/install.py +203 -0
  123. snowflake/snowpark_connect/includes/python/pyspark/instrumentation_utils.py +190 -0
  124. snowflake/snowpark_connect/includes/python/pyspark/java_gateway.py +248 -0
  125. snowflake/snowpark_connect/includes/python/pyspark/join.py +118 -0
  126. snowflake/snowpark_connect/includes/python/pyspark/ml/__init__.py +71 -0
  127. snowflake/snowpark_connect/includes/python/pyspark/ml/_typing.pyi +84 -0
  128. snowflake/snowpark_connect/includes/python/pyspark/ml/base.py +414 -0
  129. snowflake/snowpark_connect/includes/python/pyspark/ml/classification.py +4332 -0
  130. snowflake/snowpark_connect/includes/python/pyspark/ml/clustering.py +2188 -0
  131. snowflake/snowpark_connect/includes/python/pyspark/ml/common.py +146 -0
  132. snowflake/snowpark_connect/includes/python/pyspark/ml/connect/__init__.py +44 -0
  133. snowflake/snowpark_connect/includes/python/pyspark/ml/connect/base.py +346 -0
  134. snowflake/snowpark_connect/includes/python/pyspark/ml/connect/classification.py +382 -0
  135. snowflake/snowpark_connect/includes/python/pyspark/ml/connect/evaluation.py +291 -0
  136. snowflake/snowpark_connect/includes/python/pyspark/ml/connect/feature.py +258 -0
  137. snowflake/snowpark_connect/includes/python/pyspark/ml/connect/functions.py +77 -0
  138. snowflake/snowpark_connect/includes/python/pyspark/ml/connect/io_utils.py +335 -0
  139. snowflake/snowpark_connect/includes/python/pyspark/ml/connect/pipeline.py +262 -0
  140. snowflake/snowpark_connect/includes/python/pyspark/ml/connect/summarizer.py +120 -0
  141. snowflake/snowpark_connect/includes/python/pyspark/ml/connect/tuning.py +579 -0
  142. snowflake/snowpark_connect/includes/python/pyspark/ml/connect/util.py +173 -0
  143. snowflake/snowpark_connect/includes/python/pyspark/ml/deepspeed/__init__.py +16 -0
  144. snowflake/snowpark_connect/includes/python/pyspark/ml/deepspeed/deepspeed_distributor.py +165 -0
  145. snowflake/snowpark_connect/includes/python/pyspark/ml/deepspeed/tests/test_deepspeed_distributor.py +306 -0
  146. snowflake/snowpark_connect/includes/python/pyspark/ml/dl_util.py +150 -0
  147. snowflake/snowpark_connect/includes/python/pyspark/ml/evaluation.py +1166 -0
  148. snowflake/snowpark_connect/includes/python/pyspark/ml/feature.py +7474 -0
  149. snowflake/snowpark_connect/includes/python/pyspark/ml/fpm.py +543 -0
  150. snowflake/snowpark_connect/includes/python/pyspark/ml/functions.py +842 -0
  151. snowflake/snowpark_connect/includes/python/pyspark/ml/image.py +271 -0
  152. snowflake/snowpark_connect/includes/python/pyspark/ml/linalg/__init__.py +1382 -0
  153. snowflake/snowpark_connect/includes/python/pyspark/ml/model_cache.py +55 -0
  154. snowflake/snowpark_connect/includes/python/pyspark/ml/param/__init__.py +602 -0
  155. snowflake/snowpark_connect/includes/python/pyspark/ml/param/_shared_params_code_gen.py +368 -0
  156. snowflake/snowpark_connect/includes/python/pyspark/ml/param/shared.py +878 -0
  157. snowflake/snowpark_connect/includes/python/pyspark/ml/pipeline.py +451 -0
  158. snowflake/snowpark_connect/includes/python/pyspark/ml/recommendation.py +748 -0
  159. snowflake/snowpark_connect/includes/python/pyspark/ml/regression.py +3335 -0
  160. snowflake/snowpark_connect/includes/python/pyspark/ml/stat.py +523 -0
  161. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/__init__.py +16 -0
  162. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_connect_classification.py +53 -0
  163. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_connect_evaluation.py +50 -0
  164. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_connect_feature.py +43 -0
  165. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_connect_function.py +114 -0
  166. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_connect_pipeline.py +47 -0
  167. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_connect_summarizer.py +43 -0
  168. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_connect_tuning.py +46 -0
  169. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_legacy_mode_classification.py +238 -0
  170. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_legacy_mode_evaluation.py +194 -0
  171. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_legacy_mode_feature.py +156 -0
  172. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_legacy_mode_pipeline.py +184 -0
  173. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_legacy_mode_summarizer.py +78 -0
  174. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_legacy_mode_tuning.py +292 -0
  175. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_parity_torch_data_loader.py +50 -0
  176. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_parity_torch_distributor.py +152 -0
  177. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_algorithms.py +456 -0
  178. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_base.py +96 -0
  179. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_dl_util.py +186 -0
  180. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_evaluation.py +77 -0
  181. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_feature.py +401 -0
  182. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_functions.py +528 -0
  183. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_image.py +82 -0
  184. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_linalg.py +409 -0
  185. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_model_cache.py +55 -0
  186. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_param.py +441 -0
  187. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_persistence.py +546 -0
  188. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_pipeline.py +71 -0
  189. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_stat.py +52 -0
  190. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_training_summary.py +494 -0
  191. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_util.py +85 -0
  192. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_wrapper.py +138 -0
  193. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/tuning/__init__.py +16 -0
  194. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/tuning/test_cv_io_basic.py +151 -0
  195. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/tuning/test_cv_io_nested.py +97 -0
  196. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/tuning/test_cv_io_pipeline.py +143 -0
  197. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/tuning/test_tuning.py +551 -0
  198. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/tuning/test_tvs_io_basic.py +137 -0
  199. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/tuning/test_tvs_io_nested.py +96 -0
  200. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/tuning/test_tvs_io_pipeline.py +142 -0
  201. snowflake/snowpark_connect/includes/python/pyspark/ml/torch/__init__.py +16 -0
  202. snowflake/snowpark_connect/includes/python/pyspark/ml/torch/data.py +100 -0
  203. snowflake/snowpark_connect/includes/python/pyspark/ml/torch/distributor.py +1133 -0
  204. snowflake/snowpark_connect/includes/python/pyspark/ml/torch/log_communication.py +198 -0
  205. snowflake/snowpark_connect/includes/python/pyspark/ml/torch/tests/__init__.py +16 -0
  206. snowflake/snowpark_connect/includes/python/pyspark/ml/torch/tests/test_data_loader.py +137 -0
  207. snowflake/snowpark_connect/includes/python/pyspark/ml/torch/tests/test_distributor.py +561 -0
  208. snowflake/snowpark_connect/includes/python/pyspark/ml/torch/tests/test_log_communication.py +172 -0
  209. snowflake/snowpark_connect/includes/python/pyspark/ml/torch/torch_run_process_wrapper.py +83 -0
  210. snowflake/snowpark_connect/includes/python/pyspark/ml/tree.py +434 -0
  211. snowflake/snowpark_connect/includes/python/pyspark/ml/tuning.py +1741 -0
  212. snowflake/snowpark_connect/includes/python/pyspark/ml/util.py +749 -0
  213. snowflake/snowpark_connect/includes/python/pyspark/ml/wrapper.py +465 -0
  214. snowflake/snowpark_connect/includes/python/pyspark/mllib/__init__.py +44 -0
  215. snowflake/snowpark_connect/includes/python/pyspark/mllib/_typing.pyi +33 -0
  216. snowflake/snowpark_connect/includes/python/pyspark/mllib/classification.py +989 -0
  217. snowflake/snowpark_connect/includes/python/pyspark/mllib/clustering.py +1318 -0
  218. snowflake/snowpark_connect/includes/python/pyspark/mllib/common.py +174 -0
  219. snowflake/snowpark_connect/includes/python/pyspark/mllib/evaluation.py +691 -0
  220. snowflake/snowpark_connect/includes/python/pyspark/mllib/feature.py +1085 -0
  221. snowflake/snowpark_connect/includes/python/pyspark/mllib/fpm.py +233 -0
  222. snowflake/snowpark_connect/includes/python/pyspark/mllib/linalg/__init__.py +1653 -0
  223. snowflake/snowpark_connect/includes/python/pyspark/mllib/linalg/distributed.py +1662 -0
  224. snowflake/snowpark_connect/includes/python/pyspark/mllib/random.py +698 -0
  225. snowflake/snowpark_connect/includes/python/pyspark/mllib/recommendation.py +389 -0
  226. snowflake/snowpark_connect/includes/python/pyspark/mllib/regression.py +1067 -0
  227. snowflake/snowpark_connect/includes/python/pyspark/mllib/stat/KernelDensity.py +59 -0
  228. snowflake/snowpark_connect/includes/python/pyspark/mllib/stat/__init__.py +34 -0
  229. snowflake/snowpark_connect/includes/python/pyspark/mllib/stat/_statistics.py +409 -0
  230. snowflake/snowpark_connect/includes/python/pyspark/mllib/stat/distribution.py +39 -0
  231. snowflake/snowpark_connect/includes/python/pyspark/mllib/stat/test.py +86 -0
  232. snowflake/snowpark_connect/includes/python/pyspark/mllib/tests/__init__.py +16 -0
  233. snowflake/snowpark_connect/includes/python/pyspark/mllib/tests/test_algorithms.py +353 -0
  234. snowflake/snowpark_connect/includes/python/pyspark/mllib/tests/test_feature.py +192 -0
  235. snowflake/snowpark_connect/includes/python/pyspark/mllib/tests/test_linalg.py +680 -0
  236. snowflake/snowpark_connect/includes/python/pyspark/mllib/tests/test_stat.py +206 -0
  237. snowflake/snowpark_connect/includes/python/pyspark/mllib/tests/test_streaming_algorithms.py +471 -0
  238. snowflake/snowpark_connect/includes/python/pyspark/mllib/tests/test_util.py +108 -0
  239. snowflake/snowpark_connect/includes/python/pyspark/mllib/tree.py +888 -0
  240. snowflake/snowpark_connect/includes/python/pyspark/mllib/util.py +659 -0
  241. snowflake/snowpark_connect/includes/python/pyspark/pandas/__init__.py +165 -0
  242. snowflake/snowpark_connect/includes/python/pyspark/pandas/_typing.py +52 -0
  243. snowflake/snowpark_connect/includes/python/pyspark/pandas/accessors.py +989 -0
  244. snowflake/snowpark_connect/includes/python/pyspark/pandas/base.py +1804 -0
  245. snowflake/snowpark_connect/includes/python/pyspark/pandas/categorical.py +822 -0
  246. snowflake/snowpark_connect/includes/python/pyspark/pandas/config.py +539 -0
  247. snowflake/snowpark_connect/includes/python/pyspark/pandas/correlation.py +262 -0
  248. snowflake/snowpark_connect/includes/python/pyspark/pandas/data_type_ops/__init__.py +16 -0
  249. snowflake/snowpark_connect/includes/python/pyspark/pandas/data_type_ops/base.py +519 -0
  250. snowflake/snowpark_connect/includes/python/pyspark/pandas/data_type_ops/binary_ops.py +98 -0
  251. snowflake/snowpark_connect/includes/python/pyspark/pandas/data_type_ops/boolean_ops.py +426 -0
  252. snowflake/snowpark_connect/includes/python/pyspark/pandas/data_type_ops/categorical_ops.py +141 -0
  253. snowflake/snowpark_connect/includes/python/pyspark/pandas/data_type_ops/complex_ops.py +145 -0
  254. snowflake/snowpark_connect/includes/python/pyspark/pandas/data_type_ops/date_ops.py +127 -0
  255. snowflake/snowpark_connect/includes/python/pyspark/pandas/data_type_ops/datetime_ops.py +171 -0
  256. snowflake/snowpark_connect/includes/python/pyspark/pandas/data_type_ops/null_ops.py +83 -0
  257. snowflake/snowpark_connect/includes/python/pyspark/pandas/data_type_ops/num_ops.py +588 -0
  258. snowflake/snowpark_connect/includes/python/pyspark/pandas/data_type_ops/string_ops.py +154 -0
  259. snowflake/snowpark_connect/includes/python/pyspark/pandas/data_type_ops/timedelta_ops.py +101 -0
  260. snowflake/snowpark_connect/includes/python/pyspark/pandas/data_type_ops/udt_ops.py +29 -0
  261. snowflake/snowpark_connect/includes/python/pyspark/pandas/datetimes.py +891 -0
  262. snowflake/snowpark_connect/includes/python/pyspark/pandas/exceptions.py +150 -0
  263. snowflake/snowpark_connect/includes/python/pyspark/pandas/extensions.py +388 -0
  264. snowflake/snowpark_connect/includes/python/pyspark/pandas/frame.py +13738 -0
  265. snowflake/snowpark_connect/includes/python/pyspark/pandas/generic.py +3560 -0
  266. snowflake/snowpark_connect/includes/python/pyspark/pandas/groupby.py +4448 -0
  267. snowflake/snowpark_connect/includes/python/pyspark/pandas/indexes/__init__.py +21 -0
  268. snowflake/snowpark_connect/includes/python/pyspark/pandas/indexes/base.py +2783 -0
  269. snowflake/snowpark_connect/includes/python/pyspark/pandas/indexes/category.py +773 -0
  270. snowflake/snowpark_connect/includes/python/pyspark/pandas/indexes/datetimes.py +843 -0
  271. snowflake/snowpark_connect/includes/python/pyspark/pandas/indexes/multi.py +1323 -0
  272. snowflake/snowpark_connect/includes/python/pyspark/pandas/indexes/numeric.py +210 -0
  273. snowflake/snowpark_connect/includes/python/pyspark/pandas/indexes/timedelta.py +197 -0
  274. snowflake/snowpark_connect/includes/python/pyspark/pandas/indexing.py +1862 -0
  275. snowflake/snowpark_connect/includes/python/pyspark/pandas/internal.py +1680 -0
  276. snowflake/snowpark_connect/includes/python/pyspark/pandas/missing/__init__.py +48 -0
  277. snowflake/snowpark_connect/includes/python/pyspark/pandas/missing/common.py +76 -0
  278. snowflake/snowpark_connect/includes/python/pyspark/pandas/missing/frame.py +63 -0
  279. snowflake/snowpark_connect/includes/python/pyspark/pandas/missing/general_functions.py +43 -0
  280. snowflake/snowpark_connect/includes/python/pyspark/pandas/missing/groupby.py +93 -0
  281. snowflake/snowpark_connect/includes/python/pyspark/pandas/missing/indexes.py +184 -0
  282. snowflake/snowpark_connect/includes/python/pyspark/pandas/missing/resample.py +101 -0
  283. snowflake/snowpark_connect/includes/python/pyspark/pandas/missing/scalars.py +29 -0
  284. snowflake/snowpark_connect/includes/python/pyspark/pandas/missing/series.py +69 -0
  285. snowflake/snowpark_connect/includes/python/pyspark/pandas/missing/window.py +168 -0
  286. snowflake/snowpark_connect/includes/python/pyspark/pandas/mlflow.py +238 -0
  287. snowflake/snowpark_connect/includes/python/pyspark/pandas/namespace.py +3807 -0
  288. snowflake/snowpark_connect/includes/python/pyspark/pandas/numpy_compat.py +260 -0
  289. snowflake/snowpark_connect/includes/python/pyspark/pandas/plot/__init__.py +17 -0
  290. snowflake/snowpark_connect/includes/python/pyspark/pandas/plot/core.py +1213 -0
  291. snowflake/snowpark_connect/includes/python/pyspark/pandas/plot/matplotlib.py +928 -0
  292. snowflake/snowpark_connect/includes/python/pyspark/pandas/plot/plotly.py +261 -0
  293. snowflake/snowpark_connect/includes/python/pyspark/pandas/resample.py +816 -0
  294. snowflake/snowpark_connect/includes/python/pyspark/pandas/series.py +7440 -0
  295. snowflake/snowpark_connect/includes/python/pyspark/pandas/sql_formatter.py +308 -0
  296. snowflake/snowpark_connect/includes/python/pyspark/pandas/sql_processor.py +394 -0
  297. snowflake/snowpark_connect/includes/python/pyspark/pandas/strings.py +2371 -0
  298. snowflake/snowpark_connect/includes/python/pyspark/pandas/supported_api_gen.py +378 -0
  299. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/__init__.py +16 -0
  300. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/__init__.py +16 -0
  301. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_any_all.py +177 -0
  302. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_apply_func.py +575 -0
  303. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_binary_ops.py +235 -0
  304. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_combine.py +653 -0
  305. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_compute.py +463 -0
  306. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_corrwith.py +86 -0
  307. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_cov.py +151 -0
  308. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_cumulative.py +139 -0
  309. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_describe.py +458 -0
  310. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_eval.py +86 -0
  311. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_melt.py +202 -0
  312. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_missing_data.py +520 -0
  313. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_pivot.py +361 -0
  314. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/__init__.py +16 -0
  315. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/__init__.py +16 -0
  316. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_any_all.py +40 -0
  317. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_apply_func.py +42 -0
  318. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_binary_ops.py +40 -0
  319. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_combine.py +37 -0
  320. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_compute.py +60 -0
  321. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_corrwith.py +40 -0
  322. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_cov.py +40 -0
  323. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_cumulative.py +90 -0
  324. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_describe.py +40 -0
  325. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_eval.py +40 -0
  326. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_melt.py +40 -0
  327. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_missing_data.py +42 -0
  328. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_pivot.py +37 -0
  329. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/__init__.py +16 -0
  330. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_base.py +36 -0
  331. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_binary_ops.py +42 -0
  332. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_boolean_ops.py +47 -0
  333. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_categorical_ops.py +55 -0
  334. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_complex_ops.py +40 -0
  335. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_date_ops.py +47 -0
  336. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_datetime_ops.py +47 -0
  337. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_null_ops.py +42 -0
  338. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_num_arithmetic.py +43 -0
  339. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_num_ops.py +47 -0
  340. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_num_reverse.py +43 -0
  341. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_string_ops.py +47 -0
  342. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_timedelta_ops.py +47 -0
  343. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_udt_ops.py +40 -0
  344. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/testing_utils.py +226 -0
  345. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/__init__.py +16 -0
  346. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_align.py +39 -0
  347. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_basic_slow.py +55 -0
  348. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_cov_corrwith.py +39 -0
  349. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_dot_frame.py +39 -0
  350. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_dot_series.py +39 -0
  351. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_index.py +39 -0
  352. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_series.py +39 -0
  353. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_setitem_frame.py +43 -0
  354. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_setitem_series.py +43 -0
  355. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/__init__.py +16 -0
  356. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_attrs.py +40 -0
  357. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_constructor.py +39 -0
  358. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_conversion.py +42 -0
  359. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_reindexing.py +42 -0
  360. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_reshaping.py +37 -0
  361. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_spark.py +40 -0
  362. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_take.py +42 -0
  363. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_time_series.py +48 -0
  364. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_truncate.py +40 -0
  365. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/__init__.py +16 -0
  366. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_aggregate.py +40 -0
  367. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_apply_func.py +41 -0
  368. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_cumulative.py +67 -0
  369. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_describe.py +40 -0
  370. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_groupby.py +55 -0
  371. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_head_tail.py +40 -0
  372. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_index.py +38 -0
  373. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_missing_data.py +55 -0
  374. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_split_apply.py +39 -0
  375. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_stat.py +38 -0
  376. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/__init__.py +16 -0
  377. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_align.py +40 -0
  378. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_base.py +50 -0
  379. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_category.py +73 -0
  380. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_datetime.py +39 -0
  381. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_indexing.py +40 -0
  382. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_reindex.py +40 -0
  383. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_rename.py +40 -0
  384. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_reset_index.py +48 -0
  385. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_timedelta.py +39 -0
  386. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/io/__init__.py +16 -0
  387. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/io/test_parity_io.py +40 -0
  388. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/plot/__init__.py +16 -0
  389. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/plot/test_parity_frame_plot.py +45 -0
  390. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/plot/test_parity_frame_plot_matplotlib.py +45 -0
  391. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/plot/test_parity_frame_plot_plotly.py +49 -0
  392. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/plot/test_parity_series_plot.py +37 -0
  393. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/plot/test_parity_series_plot_matplotlib.py +53 -0
  394. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/plot/test_parity_series_plot_plotly.py +45 -0
  395. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/__init__.py +16 -0
  396. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_all_any.py +38 -0
  397. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_arg_ops.py +37 -0
  398. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_as_of.py +37 -0
  399. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_as_type.py +38 -0
  400. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_compute.py +37 -0
  401. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_conversion.py +40 -0
  402. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_cumulative.py +40 -0
  403. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_index.py +38 -0
  404. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_missing_data.py +40 -0
  405. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_series.py +37 -0
  406. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_sort.py +38 -0
  407. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_stat.py +38 -0
  408. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_categorical.py +66 -0
  409. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_config.py +37 -0
  410. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_csv.py +37 -0
  411. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_dataframe_conversion.py +42 -0
  412. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_dataframe_spark_io.py +39 -0
  413. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_default_index.py +49 -0
  414. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_ewm.py +37 -0
  415. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_expanding.py +39 -0
  416. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_extension.py +49 -0
  417. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_frame_spark.py +53 -0
  418. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_generic_functions.py +43 -0
  419. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_indexing.py +49 -0
  420. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_indexops_spark.py +39 -0
  421. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_internal.py +41 -0
  422. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_namespace.py +39 -0
  423. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_numpy_compat.py +60 -0
  424. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_ops_on_diff_frames.py +48 -0
  425. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_ops_on_diff_frames_groupby.py +39 -0
  426. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_ops_on_diff_frames_groupby_expanding.py +44 -0
  427. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_ops_on_diff_frames_groupby_rolling.py +84 -0
  428. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_repr.py +37 -0
  429. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_resample.py +45 -0
  430. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_reshape.py +39 -0
  431. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_rolling.py +39 -0
  432. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_scalars.py +37 -0
  433. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_series_conversion.py +39 -0
  434. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_series_datetime.py +39 -0
  435. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_series_string.py +39 -0
  436. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_spark_functions.py +39 -0
  437. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_sql.py +43 -0
  438. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_stats.py +37 -0
  439. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_typedef.py +36 -0
  440. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_utils.py +37 -0
  441. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_window.py +39 -0
  442. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/__init__.py +16 -0
  443. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_base.py +107 -0
  444. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_binary_ops.py +224 -0
  445. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_boolean_ops.py +825 -0
  446. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_categorical_ops.py +562 -0
  447. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_complex_ops.py +368 -0
  448. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_date_ops.py +257 -0
  449. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_datetime_ops.py +260 -0
  450. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_null_ops.py +178 -0
  451. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_num_arithmetic.py +184 -0
  452. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_num_ops.py +497 -0
  453. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_num_reverse.py +140 -0
  454. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_string_ops.py +354 -0
  455. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_timedelta_ops.py +219 -0
  456. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_udt_ops.py +192 -0
  457. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/testing_utils.py +228 -0
  458. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/__init__.py +16 -0
  459. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_align.py +118 -0
  460. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_basic_slow.py +198 -0
  461. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_cov_corrwith.py +181 -0
  462. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_dot_frame.py +103 -0
  463. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_dot_series.py +141 -0
  464. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_index.py +109 -0
  465. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_series.py +136 -0
  466. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_setitem_frame.py +125 -0
  467. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_setitem_series.py +217 -0
  468. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/__init__.py +16 -0
  469. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_attrs.py +384 -0
  470. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_constructor.py +598 -0
  471. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_conversion.py +73 -0
  472. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_reindexing.py +869 -0
  473. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_reshaping.py +487 -0
  474. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_spark.py +309 -0
  475. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_take.py +156 -0
  476. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_time_series.py +149 -0
  477. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_truncate.py +163 -0
  478. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/__init__.py +16 -0
  479. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_aggregate.py +311 -0
  480. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_apply_func.py +524 -0
  481. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_cumulative.py +419 -0
  482. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_describe.py +144 -0
  483. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_groupby.py +979 -0
  484. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_head_tail.py +234 -0
  485. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_index.py +206 -0
  486. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_missing_data.py +421 -0
  487. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_split_apply.py +187 -0
  488. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_stat.py +397 -0
  489. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/__init__.py +16 -0
  490. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_align.py +100 -0
  491. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_base.py +2743 -0
  492. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_category.py +484 -0
  493. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_datetime.py +276 -0
  494. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_indexing.py +432 -0
  495. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_reindex.py +310 -0
  496. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_rename.py +257 -0
  497. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_reset_index.py +160 -0
  498. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_timedelta.py +128 -0
  499. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/io/__init__.py +16 -0
  500. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/io/test_io.py +137 -0
  501. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/plot/__init__.py +16 -0
  502. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/plot/test_frame_plot.py +170 -0
  503. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/plot/test_frame_plot_matplotlib.py +547 -0
  504. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/plot/test_frame_plot_plotly.py +285 -0
  505. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/plot/test_series_plot.py +106 -0
  506. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/plot/test_series_plot_matplotlib.py +409 -0
  507. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/plot/test_series_plot_plotly.py +247 -0
  508. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/__init__.py +16 -0
  509. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_all_any.py +105 -0
  510. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_arg_ops.py +197 -0
  511. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_as_of.py +137 -0
  512. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_as_type.py +227 -0
  513. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_compute.py +634 -0
  514. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_conversion.py +88 -0
  515. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_cumulative.py +139 -0
  516. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_index.py +475 -0
  517. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_missing_data.py +265 -0
  518. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_series.py +818 -0
  519. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_sort.py +162 -0
  520. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_stat.py +780 -0
  521. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_categorical.py +741 -0
  522. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_config.py +160 -0
  523. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_csv.py +453 -0
  524. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_dataframe_conversion.py +281 -0
  525. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_dataframe_spark_io.py +487 -0
  526. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_default_index.py +109 -0
  527. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_ewm.py +434 -0
  528. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_expanding.py +253 -0
  529. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_extension.py +152 -0
  530. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_frame_spark.py +162 -0
  531. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_generic_functions.py +234 -0
  532. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_indexing.py +1339 -0
  533. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_indexops_spark.py +82 -0
  534. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_internal.py +124 -0
  535. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_namespace.py +638 -0
  536. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_numpy_compat.py +200 -0
  537. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_ops_on_diff_frames.py +1355 -0
  538. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_ops_on_diff_frames_groupby.py +655 -0
  539. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_ops_on_diff_frames_groupby_expanding.py +113 -0
  540. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_ops_on_diff_frames_groupby_rolling.py +118 -0
  541. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_repr.py +192 -0
  542. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_resample.py +346 -0
  543. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_reshape.py +495 -0
  544. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_rolling.py +263 -0
  545. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_scalars.py +59 -0
  546. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_series_conversion.py +85 -0
  547. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_series_datetime.py +364 -0
  548. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_series_string.py +362 -0
  549. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_spark_functions.py +46 -0
  550. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_sql.py +123 -0
  551. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_stats.py +581 -0
  552. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_typedef.py +447 -0
  553. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_utils.py +301 -0
  554. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_window.py +465 -0
  555. snowflake/snowpark_connect/includes/python/pyspark/pandas/typedef/__init__.py +18 -0
  556. snowflake/snowpark_connect/includes/python/pyspark/pandas/typedef/typehints.py +874 -0
  557. snowflake/snowpark_connect/includes/python/pyspark/pandas/usage_logging/__init__.py +143 -0
  558. snowflake/snowpark_connect/includes/python/pyspark/pandas/usage_logging/usage_logger.py +132 -0
  559. snowflake/snowpark_connect/includes/python/pyspark/pandas/utils.py +1063 -0
  560. snowflake/snowpark_connect/includes/python/pyspark/pandas/window.py +2702 -0
  561. snowflake/snowpark_connect/includes/python/pyspark/profiler.py +489 -0
  562. snowflake/snowpark_connect/includes/python/pyspark/py.typed +1 -0
  563. snowflake/snowpark_connect/includes/python/pyspark/python/pyspark/shell.py +123 -0
  564. snowflake/snowpark_connect/includes/python/pyspark/rdd.py +5518 -0
  565. snowflake/snowpark_connect/includes/python/pyspark/rddsampler.py +115 -0
  566. snowflake/snowpark_connect/includes/python/pyspark/resource/__init__.py +38 -0
  567. snowflake/snowpark_connect/includes/python/pyspark/resource/information.py +69 -0
  568. snowflake/snowpark_connect/includes/python/pyspark/resource/profile.py +317 -0
  569. snowflake/snowpark_connect/includes/python/pyspark/resource/requests.py +539 -0
  570. snowflake/snowpark_connect/includes/python/pyspark/resource/tests/__init__.py +16 -0
  571. snowflake/snowpark_connect/includes/python/pyspark/resource/tests/test_resources.py +83 -0
  572. snowflake/snowpark_connect/includes/python/pyspark/resultiterable.py +45 -0
  573. snowflake/snowpark_connect/includes/python/pyspark/serializers.py +681 -0
  574. snowflake/snowpark_connect/includes/python/pyspark/shell.py +123 -0
  575. snowflake/snowpark_connect/includes/python/pyspark/shuffle.py +854 -0
  576. snowflake/snowpark_connect/includes/python/pyspark/sql/__init__.py +75 -0
  577. snowflake/snowpark_connect/includes/python/pyspark/sql/_typing.pyi +80 -0
  578. snowflake/snowpark_connect/includes/python/pyspark/sql/avro/__init__.py +18 -0
  579. snowflake/snowpark_connect/includes/python/pyspark/sql/avro/functions.py +188 -0
  580. snowflake/snowpark_connect/includes/python/pyspark/sql/catalog.py +1270 -0
  581. snowflake/snowpark_connect/includes/python/pyspark/sql/column.py +1431 -0
  582. snowflake/snowpark_connect/includes/python/pyspark/sql/conf.py +99 -0
  583. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/__init__.py +18 -0
  584. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/_typing.py +90 -0
  585. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/avro/__init__.py +18 -0
  586. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/avro/functions.py +107 -0
  587. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/catalog.py +356 -0
  588. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/client/__init__.py +22 -0
  589. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/client/artifact.py +412 -0
  590. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/client/core.py +1689 -0
  591. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/client/reattach.py +340 -0
  592. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/column.py +514 -0
  593. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/conf.py +128 -0
  594. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/conversion.py +490 -0
  595. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/dataframe.py +2172 -0
  596. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/expressions.py +1056 -0
  597. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/functions.py +3937 -0
  598. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/group.py +418 -0
  599. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/plan.py +2289 -0
  600. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/proto/__init__.py +25 -0
  601. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/proto/base_pb2.py +203 -0
  602. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/proto/base_pb2.pyi +2718 -0
  603. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/proto/base_pb2_grpc.py +423 -0
  604. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/proto/catalog_pb2.py +109 -0
  605. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/proto/catalog_pb2.pyi +1130 -0
  606. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/proto/commands_pb2.py +141 -0
  607. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/proto/commands_pb2.pyi +1766 -0
  608. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/proto/common_pb2.py +47 -0
  609. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/proto/common_pb2.pyi +123 -0
  610. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/proto/example_plugins_pb2.py +53 -0
  611. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/proto/example_plugins_pb2.pyi +112 -0
  612. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/proto/expressions_pb2.py +107 -0
  613. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/proto/expressions_pb2.pyi +1507 -0
  614. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/proto/relations_pb2.py +195 -0
  615. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/proto/relations_pb2.pyi +3613 -0
  616. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/proto/types_pb2.py +95 -0
  617. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/proto/types_pb2.pyi +980 -0
  618. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/protobuf/__init__.py +18 -0
  619. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/protobuf/functions.py +166 -0
  620. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/readwriter.py +861 -0
  621. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/session.py +952 -0
  622. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/streaming/__init__.py +22 -0
  623. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/streaming/query.py +295 -0
  624. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/streaming/readwriter.py +618 -0
  625. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/streaming/worker/__init__.py +18 -0
  626. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/streaming/worker/foreach_batch_worker.py +87 -0
  627. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/streaming/worker/listener_worker.py +100 -0
  628. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/types.py +301 -0
  629. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/udf.py +296 -0
  630. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/udtf.py +200 -0
  631. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/utils.py +58 -0
  632. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/window.py +266 -0
  633. snowflake/snowpark_connect/includes/python/pyspark/sql/context.py +818 -0
  634. snowflake/snowpark_connect/includes/python/pyspark/sql/dataframe.py +5973 -0
  635. snowflake/snowpark_connect/includes/python/pyspark/sql/functions.py +15889 -0
  636. snowflake/snowpark_connect/includes/python/pyspark/sql/group.py +547 -0
  637. snowflake/snowpark_connect/includes/python/pyspark/sql/observation.py +152 -0
  638. snowflake/snowpark_connect/includes/python/pyspark/sql/pandas/__init__.py +21 -0
  639. snowflake/snowpark_connect/includes/python/pyspark/sql/pandas/_typing/__init__.pyi +344 -0
  640. snowflake/snowpark_connect/includes/python/pyspark/sql/pandas/_typing/protocols/__init__.pyi +17 -0
  641. snowflake/snowpark_connect/includes/python/pyspark/sql/pandas/_typing/protocols/frame.pyi +20 -0
  642. snowflake/snowpark_connect/includes/python/pyspark/sql/pandas/_typing/protocols/series.pyi +20 -0
  643. snowflake/snowpark_connect/includes/python/pyspark/sql/pandas/conversion.py +671 -0
  644. snowflake/snowpark_connect/includes/python/pyspark/sql/pandas/functions.py +480 -0
  645. snowflake/snowpark_connect/includes/python/pyspark/sql/pandas/functions.pyi +132 -0
  646. snowflake/snowpark_connect/includes/python/pyspark/sql/pandas/group_ops.py +523 -0
  647. snowflake/snowpark_connect/includes/python/pyspark/sql/pandas/map_ops.py +216 -0
  648. snowflake/snowpark_connect/includes/python/pyspark/sql/pandas/serializers.py +1019 -0
  649. snowflake/snowpark_connect/includes/python/pyspark/sql/pandas/typehints.py +172 -0
  650. snowflake/snowpark_connect/includes/python/pyspark/sql/pandas/types.py +972 -0
  651. snowflake/snowpark_connect/includes/python/pyspark/sql/pandas/utils.py +86 -0
  652. snowflake/snowpark_connect/includes/python/pyspark/sql/protobuf/__init__.py +18 -0
  653. snowflake/snowpark_connect/includes/python/pyspark/sql/protobuf/functions.py +334 -0
  654. snowflake/snowpark_connect/includes/python/pyspark/sql/readwriter.py +2159 -0
  655. snowflake/snowpark_connect/includes/python/pyspark/sql/session.py +2088 -0
  656. snowflake/snowpark_connect/includes/python/pyspark/sql/sql_formatter.py +84 -0
  657. snowflake/snowpark_connect/includes/python/pyspark/sql/streaming/__init__.py +21 -0
  658. snowflake/snowpark_connect/includes/python/pyspark/sql/streaming/listener.py +1050 -0
  659. snowflake/snowpark_connect/includes/python/pyspark/sql/streaming/query.py +746 -0
  660. snowflake/snowpark_connect/includes/python/pyspark/sql/streaming/readwriter.py +1652 -0
  661. snowflake/snowpark_connect/includes/python/pyspark/sql/streaming/state.py +288 -0
  662. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/__init__.py +16 -0
  663. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/__init__.py +16 -0
  664. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/client/__init__.py +16 -0
  665. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/client/test_artifact.py +420 -0
  666. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/client/test_client.py +358 -0
  667. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/streaming/__init__.py +16 -0
  668. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/streaming/test_parity_foreach.py +36 -0
  669. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/streaming/test_parity_foreach_batch.py +44 -0
  670. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/streaming/test_parity_listener.py +116 -0
  671. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/streaming/test_parity_streaming.py +35 -0
  672. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_connect_basic.py +3612 -0
  673. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_connect_column.py +1042 -0
  674. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_connect_function.py +2381 -0
  675. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_connect_plan.py +1060 -0
  676. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_arrow.py +163 -0
  677. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_arrow_map.py +38 -0
  678. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_arrow_python_udf.py +48 -0
  679. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_catalog.py +36 -0
  680. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_column.py +55 -0
  681. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_conf.py +36 -0
  682. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_dataframe.py +96 -0
  683. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_datasources.py +44 -0
  684. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_errors.py +36 -0
  685. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_functions.py +59 -0
  686. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_group.py +36 -0
  687. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_pandas_cogrouped_map.py +59 -0
  688. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_pandas_grouped_map.py +74 -0
  689. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_pandas_grouped_map_with_state.py +62 -0
  690. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_pandas_map.py +58 -0
  691. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_pandas_udf.py +70 -0
  692. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_pandas_udf_grouped_agg.py +50 -0
  693. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_pandas_udf_scalar.py +68 -0
  694. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_pandas_udf_window.py +40 -0
  695. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_readwriter.py +46 -0
  696. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_serde.py +44 -0
  697. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_types.py +100 -0
  698. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_udf.py +100 -0
  699. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_udtf.py +163 -0
  700. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_session.py +181 -0
  701. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_utils.py +42 -0
  702. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/__init__.py +16 -0
  703. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_cogrouped_map.py +623 -0
  704. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_grouped_map.py +869 -0
  705. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_grouped_map_with_state.py +342 -0
  706. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_map.py +436 -0
  707. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_udf.py +363 -0
  708. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_udf_grouped_agg.py +592 -0
  709. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_udf_scalar.py +1503 -0
  710. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_udf_typehints.py +392 -0
  711. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_udf_typehints_with_future_annotations.py +375 -0
  712. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_udf_window.py +411 -0
  713. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/streaming/__init__.py +16 -0
  714. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/streaming/test_streaming.py +401 -0
  715. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/streaming/test_streaming_foreach.py +295 -0
  716. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/streaming/test_streaming_foreach_batch.py +106 -0
  717. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/streaming/test_streaming_listener.py +558 -0
  718. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_arrow.py +1346 -0
  719. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_arrow_map.py +182 -0
  720. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_arrow_python_udf.py +202 -0
  721. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_catalog.py +503 -0
  722. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_column.py +225 -0
  723. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_conf.py +83 -0
  724. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_context.py +201 -0
  725. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_dataframe.py +1931 -0
  726. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_datasources.py +256 -0
  727. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_errors.py +69 -0
  728. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_functions.py +1349 -0
  729. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_group.py +53 -0
  730. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_pandas_sqlmetrics.py +68 -0
  731. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_readwriter.py +283 -0
  732. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_serde.py +155 -0
  733. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_session.py +412 -0
  734. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_types.py +1581 -0
  735. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_udf.py +961 -0
  736. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_udf_profiler.py +165 -0
  737. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_udtf.py +1456 -0
  738. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_utils.py +1686 -0
  739. snowflake/snowpark_connect/includes/python/pyspark/sql/types.py +2558 -0
  740. snowflake/snowpark_connect/includes/python/pyspark/sql/udf.py +714 -0
  741. snowflake/snowpark_connect/includes/python/pyspark/sql/udtf.py +325 -0
  742. snowflake/snowpark_connect/includes/python/pyspark/sql/utils.py +339 -0
  743. snowflake/snowpark_connect/includes/python/pyspark/sql/window.py +492 -0
  744. snowflake/snowpark_connect/includes/python/pyspark/statcounter.py +165 -0
  745. snowflake/snowpark_connect/includes/python/pyspark/status.py +112 -0
  746. snowflake/snowpark_connect/includes/python/pyspark/storagelevel.py +97 -0
  747. snowflake/snowpark_connect/includes/python/pyspark/streaming/__init__.py +22 -0
  748. snowflake/snowpark_connect/includes/python/pyspark/streaming/context.py +471 -0
  749. snowflake/snowpark_connect/includes/python/pyspark/streaming/dstream.py +933 -0
  750. snowflake/snowpark_connect/includes/python/pyspark/streaming/kinesis.py +205 -0
  751. snowflake/snowpark_connect/includes/python/pyspark/streaming/listener.py +83 -0
  752. snowflake/snowpark_connect/includes/python/pyspark/streaming/tests/__init__.py +16 -0
  753. snowflake/snowpark_connect/includes/python/pyspark/streaming/tests/test_context.py +184 -0
  754. snowflake/snowpark_connect/includes/python/pyspark/streaming/tests/test_dstream.py +706 -0
  755. snowflake/snowpark_connect/includes/python/pyspark/streaming/tests/test_kinesis.py +118 -0
  756. snowflake/snowpark_connect/includes/python/pyspark/streaming/tests/test_listener.py +160 -0
  757. snowflake/snowpark_connect/includes/python/pyspark/streaming/util.py +168 -0
  758. snowflake/snowpark_connect/includes/python/pyspark/taskcontext.py +502 -0
  759. snowflake/snowpark_connect/includes/python/pyspark/testing/__init__.py +21 -0
  760. snowflake/snowpark_connect/includes/python/pyspark/testing/connectutils.py +199 -0
  761. snowflake/snowpark_connect/includes/python/pyspark/testing/mllibutils.py +30 -0
  762. snowflake/snowpark_connect/includes/python/pyspark/testing/mlutils.py +275 -0
  763. snowflake/snowpark_connect/includes/python/pyspark/testing/objects.py +121 -0
  764. snowflake/snowpark_connect/includes/python/pyspark/testing/pandasutils.py +714 -0
  765. snowflake/snowpark_connect/includes/python/pyspark/testing/sqlutils.py +168 -0
  766. snowflake/snowpark_connect/includes/python/pyspark/testing/streamingutils.py +178 -0
  767. snowflake/snowpark_connect/includes/python/pyspark/testing/utils.py +636 -0
  768. snowflake/snowpark_connect/includes/python/pyspark/tests/__init__.py +16 -0
  769. snowflake/snowpark_connect/includes/python/pyspark/tests/test_appsubmit.py +306 -0
  770. snowflake/snowpark_connect/includes/python/pyspark/tests/test_broadcast.py +196 -0
  771. snowflake/snowpark_connect/includes/python/pyspark/tests/test_conf.py +44 -0
  772. snowflake/snowpark_connect/includes/python/pyspark/tests/test_context.py +346 -0
  773. snowflake/snowpark_connect/includes/python/pyspark/tests/test_daemon.py +89 -0
  774. snowflake/snowpark_connect/includes/python/pyspark/tests/test_install_spark.py +124 -0
  775. snowflake/snowpark_connect/includes/python/pyspark/tests/test_join.py +69 -0
  776. snowflake/snowpark_connect/includes/python/pyspark/tests/test_memory_profiler.py +167 -0
  777. snowflake/snowpark_connect/includes/python/pyspark/tests/test_pin_thread.py +194 -0
  778. snowflake/snowpark_connect/includes/python/pyspark/tests/test_profiler.py +168 -0
  779. snowflake/snowpark_connect/includes/python/pyspark/tests/test_rdd.py +939 -0
  780. snowflake/snowpark_connect/includes/python/pyspark/tests/test_rddbarrier.py +52 -0
  781. snowflake/snowpark_connect/includes/python/pyspark/tests/test_rddsampler.py +66 -0
  782. snowflake/snowpark_connect/includes/python/pyspark/tests/test_readwrite.py +368 -0
  783. snowflake/snowpark_connect/includes/python/pyspark/tests/test_serializers.py +257 -0
  784. snowflake/snowpark_connect/includes/python/pyspark/tests/test_shuffle.py +267 -0
  785. snowflake/snowpark_connect/includes/python/pyspark/tests/test_stage_sched.py +153 -0
  786. snowflake/snowpark_connect/includes/python/pyspark/tests/test_statcounter.py +130 -0
  787. snowflake/snowpark_connect/includes/python/pyspark/tests/test_taskcontext.py +350 -0
  788. snowflake/snowpark_connect/includes/python/pyspark/tests/test_util.py +97 -0
  789. snowflake/snowpark_connect/includes/python/pyspark/tests/test_worker.py +271 -0
  790. snowflake/snowpark_connect/includes/python/pyspark/traceback_utils.py +81 -0
  791. snowflake/snowpark_connect/includes/python/pyspark/util.py +416 -0
  792. snowflake/snowpark_connect/includes/python/pyspark/version.py +19 -0
  793. snowflake/snowpark_connect/includes/python/pyspark/worker.py +1307 -0
  794. snowflake/snowpark_connect/includes/python/pyspark/worker_util.py +46 -0
  795. snowflake/snowpark_connect/proto/__init__.py +10 -0
  796. snowflake/snowpark_connect/proto/control_pb2.py +35 -0
  797. snowflake/snowpark_connect/proto/control_pb2.pyi +38 -0
  798. snowflake/snowpark_connect/proto/control_pb2_grpc.py +183 -0
  799. snowflake/snowpark_connect/proto/snowflake_expression_ext_pb2.py +35 -0
  800. snowflake/snowpark_connect/proto/snowflake_expression_ext_pb2.pyi +53 -0
  801. snowflake/snowpark_connect/proto/snowflake_rdd_pb2.pyi +39 -0
  802. snowflake/snowpark_connect/proto/snowflake_relation_ext_pb2.py +47 -0
  803. snowflake/snowpark_connect/proto/snowflake_relation_ext_pb2.pyi +111 -0
  804. snowflake/snowpark_connect/relation/__init__.py +3 -0
  805. snowflake/snowpark_connect/relation/catalogs/__init__.py +12 -0
  806. snowflake/snowpark_connect/relation/catalogs/abstract_spark_catalog.py +287 -0
  807. snowflake/snowpark_connect/relation/catalogs/snowflake_catalog.py +467 -0
  808. snowflake/snowpark_connect/relation/catalogs/utils.py +51 -0
  809. snowflake/snowpark_connect/relation/io_utils.py +76 -0
  810. snowflake/snowpark_connect/relation/map_aggregate.py +322 -0
  811. snowflake/snowpark_connect/relation/map_catalog.py +151 -0
  812. snowflake/snowpark_connect/relation/map_column_ops.py +1068 -0
  813. snowflake/snowpark_connect/relation/map_crosstab.py +48 -0
  814. snowflake/snowpark_connect/relation/map_extension.py +412 -0
  815. snowflake/snowpark_connect/relation/map_join.py +341 -0
  816. snowflake/snowpark_connect/relation/map_local_relation.py +326 -0
  817. snowflake/snowpark_connect/relation/map_map_partitions.py +146 -0
  818. snowflake/snowpark_connect/relation/map_relation.py +253 -0
  819. snowflake/snowpark_connect/relation/map_row_ops.py +716 -0
  820. snowflake/snowpark_connect/relation/map_sample_by.py +35 -0
  821. snowflake/snowpark_connect/relation/map_show_string.py +50 -0
  822. snowflake/snowpark_connect/relation/map_sql.py +1874 -0
  823. snowflake/snowpark_connect/relation/map_stats.py +324 -0
  824. snowflake/snowpark_connect/relation/map_subquery_alias.py +32 -0
  825. snowflake/snowpark_connect/relation/map_udtf.py +288 -0
  826. snowflake/snowpark_connect/relation/read/__init__.py +7 -0
  827. snowflake/snowpark_connect/relation/read/jdbc_read_dbapi.py +668 -0
  828. snowflake/snowpark_connect/relation/read/map_read.py +367 -0
  829. snowflake/snowpark_connect/relation/read/map_read_csv.py +142 -0
  830. snowflake/snowpark_connect/relation/read/map_read_jdbc.py +108 -0
  831. snowflake/snowpark_connect/relation/read/map_read_json.py +344 -0
  832. snowflake/snowpark_connect/relation/read/map_read_parquet.py +194 -0
  833. snowflake/snowpark_connect/relation/read/map_read_socket.py +59 -0
  834. snowflake/snowpark_connect/relation/read/map_read_table.py +109 -0
  835. snowflake/snowpark_connect/relation/read/map_read_text.py +106 -0
  836. snowflake/snowpark_connect/relation/read/reader_config.py +399 -0
  837. snowflake/snowpark_connect/relation/read/utils.py +155 -0
  838. snowflake/snowpark_connect/relation/stage_locator.py +161 -0
  839. snowflake/snowpark_connect/relation/utils.py +219 -0
  840. snowflake/snowpark_connect/relation/write/__init__.py +3 -0
  841. snowflake/snowpark_connect/relation/write/jdbc_write_dbapi.py +339 -0
  842. snowflake/snowpark_connect/relation/write/map_write.py +436 -0
  843. snowflake/snowpark_connect/relation/write/map_write_jdbc.py +48 -0
  844. snowflake/snowpark_connect/resources/java_udfs-1.0-SNAPSHOT.jar +0 -0
  845. snowflake/snowpark_connect/resources_initializer.py +75 -0
  846. snowflake/snowpark_connect/server.py +1136 -0
  847. snowflake/snowpark_connect/start_server.py +32 -0
  848. snowflake/snowpark_connect/tcm.py +8 -0
  849. snowflake/snowpark_connect/type_mapping.py +1003 -0
  850. snowflake/snowpark_connect/typed_column.py +94 -0
  851. snowflake/snowpark_connect/utils/__init__.py +3 -0
  852. snowflake/snowpark_connect/utils/artifacts.py +48 -0
  853. snowflake/snowpark_connect/utils/attribute_handling.py +72 -0
  854. snowflake/snowpark_connect/utils/cache.py +84 -0
  855. snowflake/snowpark_connect/utils/concurrent.py +124 -0
  856. snowflake/snowpark_connect/utils/context.py +390 -0
  857. snowflake/snowpark_connect/utils/describe_query_cache.py +231 -0
  858. snowflake/snowpark_connect/utils/interrupt.py +85 -0
  859. snowflake/snowpark_connect/utils/io_utils.py +35 -0
  860. snowflake/snowpark_connect/utils/pandas_udtf_utils.py +117 -0
  861. snowflake/snowpark_connect/utils/profiling.py +47 -0
  862. snowflake/snowpark_connect/utils/session.py +180 -0
  863. snowflake/snowpark_connect/utils/snowpark_connect_logging.py +38 -0
  864. snowflake/snowpark_connect/utils/telemetry.py +513 -0
  865. snowflake/snowpark_connect/utils/udf_cache.py +392 -0
  866. snowflake/snowpark_connect/utils/udf_helper.py +328 -0
  867. snowflake/snowpark_connect/utils/udf_utils.py +310 -0
  868. snowflake/snowpark_connect/utils/udtf_helper.py +420 -0
  869. snowflake/snowpark_connect/utils/udtf_utils.py +799 -0
  870. snowflake/snowpark_connect/utils/xxhash64.py +247 -0
  871. snowflake/snowpark_connect/version.py +6 -0
  872. snowpark_connect-0.20.2.data/scripts/snowpark-connect +71 -0
  873. snowpark_connect-0.20.2.data/scripts/snowpark-session +11 -0
  874. snowpark_connect-0.20.2.data/scripts/snowpark-submit +354 -0
  875. snowpark_connect-0.20.2.dist-info/METADATA +37 -0
  876. snowpark_connect-0.20.2.dist-info/RECORD +879 -0
  877. snowpark_connect-0.20.2.dist-info/WHEEL +5 -0
  878. snowpark_connect-0.20.2.dist-info/licenses/LICENSE.txt +202 -0
  879. snowpark_connect-0.20.2.dist-info/top_level.txt +1 -0
@@ -0,0 +1,1689 @@
1
+ #
2
+ # Licensed to the Apache Software Foundation (ASF) under one or more
3
+ # contributor license agreements. See the NOTICE file distributed with
4
+ # this work for additional information regarding copyright ownership.
5
+ # The ASF licenses this file to You under the Apache License, Version 2.0
6
+ # (the "License"); you may not use this file except in compliance with
7
+ # the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing, software
12
+ # distributed under the License is distributed on an "AS IS" BASIS,
13
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ # See the License for the specific language governing permissions and
15
+ # limitations under the License.
16
+ #
17
+ __all__ = [
18
+ "ChannelBuilder",
19
+ "SparkConnectClient",
20
+ "getLogLevel",
21
+ ]
22
+
23
+ from pyspark.sql.connect.utils import check_dependencies
24
+
25
+ check_dependencies(__name__)
26
+
27
+ import threading
28
+ import logging
29
+ import os
30
+ import platform
31
+ import random
32
+ import time
33
+ import urllib.parse
34
+ import uuid
35
+ import sys
36
+ from types import TracebackType
37
+ from typing import (
38
+ Iterable,
39
+ Iterator,
40
+ Optional,
41
+ Any,
42
+ Union,
43
+ List,
44
+ Tuple,
45
+ Dict,
46
+ Set,
47
+ NoReturn,
48
+ cast,
49
+ Callable,
50
+ Generator,
51
+ Type,
52
+ TYPE_CHECKING,
53
+ Sequence,
54
+ )
55
+
56
+ import pandas as pd
57
+ import pyarrow as pa
58
+
59
+ import google.protobuf.message
60
+ from grpc_status import rpc_status
61
+ import grpc
62
+ from google.protobuf import text_format
63
+ from google.rpc import error_details_pb2
64
+
65
+ from pyspark.version import __version__
66
+ from pyspark.resource.information import ResourceInformation
67
+ from pyspark.sql.connect.client.artifact import ArtifactManager
68
+ from pyspark.sql.connect.client.reattach import (
69
+ ExecutePlanResponseReattachableIterator,
70
+ RetryException,
71
+ )
72
+ from pyspark.sql.connect.conversion import storage_level_to_proto, proto_to_storage_level
73
+ import pyspark.sql.connect.proto as pb2
74
+ import pyspark.sql.connect.proto.base_pb2_grpc as grpc_lib
75
+ import pyspark.sql.connect.types as types
76
+ from pyspark.errors.exceptions.connect import (
77
+ convert_exception,
78
+ SparkConnectException,
79
+ SparkConnectGrpcException,
80
+ )
81
+ from pyspark.sql.connect.expressions import (
82
+ PythonUDF,
83
+ CommonInlineUserDefinedFunction,
84
+ JavaUDF,
85
+ )
86
+ from pyspark.sql.connect.plan import (
87
+ CommonInlineUserDefinedTableFunction,
88
+ PythonUDTF,
89
+ )
90
+ from pyspark.sql.connect.utils import get_python_ver
91
+ from pyspark.sql.pandas.types import _create_converter_to_pandas, from_arrow_schema
92
+ from pyspark.sql.types import DataType, StructType, TimestampType, _has_type
93
+ from pyspark.rdd import PythonEvalType
94
+ from pyspark.storagelevel import StorageLevel
95
+ from pyspark.errors import PySparkValueError
96
+
97
+
98
+ if TYPE_CHECKING:
99
+ from pyspark.sql.connect._typing import DataTypeOrString
100
+
101
+
102
+ def _configure_logging() -> logging.Logger:
103
+ """Configure logging for the Spark Connect clients."""
104
+ logger = logging.getLogger(__name__)
105
+ handler = logging.StreamHandler()
106
+ handler.setFormatter(
107
+ logging.Formatter(fmt="%(asctime)s %(process)d %(levelname)s %(funcName)s %(message)s")
108
+ )
109
+ logger.addHandler(handler)
110
+
111
+ # Check the environment variables for log levels:
112
+ if "SPARK_CONNECT_LOG_LEVEL" in os.environ:
113
+ logger.setLevel(os.environ["SPARK_CONNECT_LOG_LEVEL"].upper())
114
+ else:
115
+ logger.disabled = True
116
+ return logger
117
+
118
+
119
+ # Instantiate the logger based on the environment configuration.
120
+ logger = _configure_logging()
121
+
122
+
123
+ def getLogLevel() -> Optional[int]:
124
+ """
125
+ This returns this log level as integer, or none (if no logging is enabled).
126
+
127
+ Spark Connect logging can be configured with environment variable 'SPARK_CONNECT_LOG_LEVEL'
128
+
129
+ .. versionadded:: 3.5.0
130
+ """
131
+
132
+ if not logger.disabled:
133
+ return logger.level
134
+ return None
135
+
136
+
137
+ class ChannelBuilder:
138
+ """
139
+ This is a helper class that is used to create a GRPC channel based on the given
140
+ connection string per the documentation of Spark Connect.
141
+
142
+ .. versionadded:: 3.4.0
143
+
144
+ Examples
145
+ --------
146
+ >>> cb = ChannelBuilder("sc://localhost")
147
+ ... cb.endpoint
148
+ "localhost:15002"
149
+
150
+ >>> cb = ChannelBuilder("sc://localhost/;use_ssl=true;token=aaa")
151
+ ... cb.secure
152
+ True
153
+ """
154
+
155
+ PARAM_USE_SSL = "use_ssl"
156
+ PARAM_TOKEN = "token"
157
+ PARAM_USER_ID = "user_id"
158
+ PARAM_USER_AGENT = "user_agent"
159
+ PARAM_SESSION_ID = "session_id"
160
+ MAX_MESSAGE_LENGTH = 128 * 1024 * 1024
161
+
162
+ @staticmethod
163
+ def default_port() -> int:
164
+ if "SPARK_TESTING" in os.environ:
165
+ from pyspark.sql.session import SparkSession as PySparkSession
166
+
167
+ # In the case when Spark Connect uses the local mode, it starts the regular Spark
168
+ # session that starts Spark Connect server that sets `SparkSession._instantiatedSession`
169
+ # via SparkSession.__init__.
170
+ #
171
+ # We are getting the actual server port from the Spark session via Py4J to address
172
+ # the case when the server port is set to 0 (in which allocates an ephemeral port).
173
+ #
174
+ # This is only used in the test/development mode.
175
+ session = PySparkSession._instantiatedSession
176
+
177
+ # 'spark.local.connect' is set when we use the local mode in Spark Connect.
178
+ if session is not None and session.conf.get("spark.local.connect", "0") == "1":
179
+
180
+ jvm = PySparkSession._instantiatedSession._jvm # type: ignore[union-attr]
181
+ return getattr(
182
+ getattr(
183
+ jvm.org.apache.spark.sql.connect.service, # type: ignore[union-attr]
184
+ "SparkConnectService$",
185
+ ),
186
+ "MODULE$",
187
+ ).localPort()
188
+ return 15002
189
+
190
+ def __init__(self, url: str, channelOptions: Optional[List[Tuple[str, Any]]] = None) -> None:
191
+ """
192
+ Constructs a new channel builder. This is used to create the proper GRPC channel from
193
+ the connection string.
194
+
195
+ Parameters
196
+ ----------
197
+ url : str
198
+ Spark Connect connection string
199
+ channelOptions: list of tuple, optional
200
+ Additional options that can be passed to the GRPC channel construction.
201
+ """
202
+ # Explicitly check the scheme of the URL.
203
+ if url[:5] != "sc://":
204
+ raise PySparkValueError(
205
+ error_class="INVALID_CONNECT_URL",
206
+ message_parameters={
207
+ "detail": "The URL must start with 'sc://'. Please update the URL to "
208
+ "follow the correct format, e.g., 'sc://hostname:port'.",
209
+ },
210
+ )
211
+ # Rewrite the URL to use http as the scheme so that we can leverage
212
+ # Python's built-in parser.
213
+ tmp_url = "http" + url[2:]
214
+ self.url = urllib.parse.urlparse(tmp_url)
215
+ self.params: Dict[str, str] = {}
216
+ if len(self.url.path) > 0 and self.url.path != "/":
217
+ raise PySparkValueError(
218
+ error_class="INVALID_CONNECT_URL",
219
+ message_parameters={
220
+ "detail": f"The path component '{self.url.path}' must be empty. Please update "
221
+ f"the URL to follow the correct format, e.g., 'sc://hostname:port'.",
222
+ },
223
+ )
224
+ self._extract_attributes()
225
+
226
+ GRPC_DEFAULT_OPTIONS = [
227
+ ("grpc.max_send_message_length", ChannelBuilder.MAX_MESSAGE_LENGTH),
228
+ ("grpc.max_receive_message_length", ChannelBuilder.MAX_MESSAGE_LENGTH),
229
+ ]
230
+
231
+ if channelOptions is None:
232
+ self._channel_options = GRPC_DEFAULT_OPTIONS
233
+ else:
234
+ self._channel_options = GRPC_DEFAULT_OPTIONS + channelOptions
235
+
236
+ def _extract_attributes(self) -> None:
237
+ if len(self.url.params) > 0:
238
+ parts = self.url.params.split(";")
239
+ for p in parts:
240
+ kv = p.split("=")
241
+ if len(kv) != 2:
242
+ raise PySparkValueError(
243
+ error_class="INVALID_CONNECT_URL",
244
+ message_parameters={
245
+ "detail": f"Parameter '{p}' should be provided as a "
246
+ f"key-value pair separated by an equal sign (=). Please update "
247
+ f"the parameter to follow the correct format, e.g., 'key=value'.",
248
+ },
249
+ )
250
+ self.params[kv[0]] = urllib.parse.unquote(kv[1])
251
+
252
+ netloc = self.url.netloc.split(":")
253
+ if len(netloc) == 1:
254
+ self.host = netloc[0]
255
+ self.port = ChannelBuilder.default_port()
256
+ elif len(netloc) == 2:
257
+ self.host = netloc[0]
258
+ self.port = int(netloc[1])
259
+ else:
260
+ raise PySparkValueError(
261
+ error_class="INVALID_CONNECT_URL",
262
+ message_parameters={
263
+ "detail": f"Target destination '{self.url.netloc}' should match the "
264
+ f"'<host>:<port>' pattern. Please update the destination to follow "
265
+ f"the correct format, e.g., 'hostname:port'.",
266
+ },
267
+ )
268
+
269
+ def metadata(self) -> Iterable[Tuple[str, str]]:
270
+ """
271
+ Builds the GRPC specific metadata list to be injected into the request. All
272
+ parameters will be converted to metadata except ones that are explicitly used
273
+ by the channel.
274
+
275
+ Returns
276
+ -------
277
+ A list of tuples (key, value)
278
+ """
279
+ return [
280
+ (k, self.params[k])
281
+ for k in self.params
282
+ if k
283
+ not in [
284
+ ChannelBuilder.PARAM_TOKEN,
285
+ ChannelBuilder.PARAM_USE_SSL,
286
+ ChannelBuilder.PARAM_USER_ID,
287
+ ChannelBuilder.PARAM_USER_AGENT,
288
+ ChannelBuilder.PARAM_SESSION_ID,
289
+ ]
290
+ ]
291
+
292
+ @property
293
+ def secure(self) -> bool:
294
+ if self._token is not None:
295
+ return True
296
+
297
+ value = self.params.get(ChannelBuilder.PARAM_USE_SSL, "")
298
+ return value.lower() == "true"
299
+
300
+ @property
301
+ def endpoint(self) -> str:
302
+ return f"{self.host}:{self.port}"
303
+
304
+ @property
305
+ def _token(self) -> Optional[str]:
306
+ return self.params.get(ChannelBuilder.PARAM_TOKEN, None)
307
+
308
+ @property
309
+ def userId(self) -> Optional[str]:
310
+ """
311
+ Returns
312
+ -------
313
+ The user_id extracted from the parameters of the connection string or `None` if not
314
+ specified.
315
+ """
316
+ return self.params.get(ChannelBuilder.PARAM_USER_ID, None)
317
+
318
+ @property
319
+ def userAgent(self) -> str:
320
+ """
321
+ Returns
322
+ -------
323
+ user_agent : str
324
+ The user_agent parameter specified in the connection string,
325
+ or "_SPARK_CONNECT_PYTHON" when not specified.
326
+ The returned value will be percent encoded.
327
+ """
328
+ user_agent = self.params.get(
329
+ ChannelBuilder.PARAM_USER_AGENT,
330
+ os.getenv("SPARK_CONNECT_USER_AGENT", "_SPARK_CONNECT_PYTHON"),
331
+ )
332
+ ua_len = len(urllib.parse.quote(user_agent))
333
+ if ua_len > 2048:
334
+ raise SparkConnectException(
335
+ f"'user_agent' parameter should not exceed 2048 characters, found {len} characters."
336
+ )
337
+ return " ".join(
338
+ [
339
+ user_agent,
340
+ f"spark/{__version__}",
341
+ f"os/{platform.uname().system.lower()}",
342
+ f"python/{platform.python_version()}",
343
+ ]
344
+ )
345
+
346
+ def get(self, key: str) -> Any:
347
+ """
348
+ Parameters
349
+ ----------
350
+ key : str
351
+ Parameter key name.
352
+
353
+ Returns
354
+ -------
355
+ The parameter value if present, raises exception otherwise.
356
+ """
357
+ return self.params[key]
358
+
359
+ @property
360
+ def session_id(self) -> Optional[str]:
361
+ """
362
+ Returns
363
+ -------
364
+ The session_id extracted from the parameters of the connection string or `None` if not
365
+ specified.
366
+ """
367
+ session_id = self.params.get(ChannelBuilder.PARAM_SESSION_ID, None)
368
+ if session_id is not None:
369
+ try:
370
+ uuid.UUID(session_id, version=4)
371
+ except ValueError as ve:
372
+ raise ValueError("Parameter value 'session_id' must be a valid UUID format.", ve)
373
+ return session_id
374
+
375
+ def toChannel(self) -> grpc.Channel:
376
+ """
377
+ Applies the parameters of the connection string and creates a new
378
+ GRPC channel according to the configuration. Passes optional channel options to
379
+ construct the channel.
380
+
381
+ Returns
382
+ -------
383
+ GRPC Channel instance.
384
+ """
385
+ destination = f"{self.host}:{self.port}"
386
+
387
+ # Setting a token implicitly sets the `use_ssl` to True.
388
+ if not self.secure and self._token is not None:
389
+ use_secure = True
390
+ elif self.secure:
391
+ use_secure = True
392
+ else:
393
+ use_secure = False
394
+
395
+ if not use_secure:
396
+ return grpc.insecure_channel(destination, options=self._channel_options)
397
+ else:
398
+ # Default SSL Credentials.
399
+ opt_token = self.params.get(ChannelBuilder.PARAM_TOKEN, None)
400
+ # When a token is present, pass the token to the channel.
401
+ if opt_token is not None:
402
+ ssl_creds = grpc.ssl_channel_credentials()
403
+ composite_creds = grpc.composite_channel_credentials(
404
+ ssl_creds, grpc.access_token_call_credentials(opt_token)
405
+ )
406
+ return grpc.secure_channel(
407
+ destination, credentials=composite_creds, options=self._channel_options
408
+ )
409
+ else:
410
+ return grpc.secure_channel(
411
+ destination,
412
+ credentials=grpc.ssl_channel_credentials(),
413
+ options=self._channel_options,
414
+ )
415
+
416
+
417
+ class MetricValue:
418
+ def __init__(self, name: str, value: Union[int, float], type: str):
419
+ self._name = name
420
+ self._type = type
421
+ self._value = value
422
+
423
+ def __repr__(self) -> str:
424
+ return f"<{self._name}={self._value} ({self._type})>"
425
+
426
+ @property
427
+ def name(self) -> str:
428
+ return self._name
429
+
430
+ @property
431
+ def value(self) -> Union[int, float]:
432
+ return self._value
433
+
434
+ @property
435
+ def metric_type(self) -> str:
436
+ return self._type
437
+
438
+
439
+ class PlanMetrics:
440
+ def __init__(self, name: str, id: int, parent: int, metrics: List[MetricValue]):
441
+ self._name = name
442
+ self._id = id
443
+ self._parent_id = parent
444
+ self._metrics = metrics
445
+
446
+ def __repr__(self) -> str:
447
+ return f"Plan({self._name})={self._metrics}"
448
+
449
+ @property
450
+ def name(self) -> str:
451
+ return self._name
452
+
453
+ @property
454
+ def plan_id(self) -> int:
455
+ return self._id
456
+
457
+ @property
458
+ def parent_plan_id(self) -> int:
459
+ return self._parent_id
460
+
461
+ @property
462
+ def metrics(self) -> List[MetricValue]:
463
+ return self._metrics
464
+
465
+
466
+ class PlanObservedMetrics:
467
+ def __init__(self, name: str, metrics: List[pb2.Expression.Literal]):
468
+ self._name = name
469
+ self._metrics = metrics
470
+
471
+ def __repr__(self) -> str:
472
+ return f"Plan observed({self._name}={self._metrics})"
473
+
474
+ @property
475
+ def name(self) -> str:
476
+ return self._name
477
+
478
+ @property
479
+ def metrics(self) -> List[pb2.Expression.Literal]:
480
+ return self._metrics
481
+
482
+
483
+ class AnalyzeResult:
484
+ def __init__(
485
+ self,
486
+ schema: Optional[DataType],
487
+ explain_string: Optional[str],
488
+ tree_string: Optional[str],
489
+ is_local: Optional[bool],
490
+ is_streaming: Optional[bool],
491
+ input_files: Optional[List[str]],
492
+ spark_version: Optional[str],
493
+ parsed: Optional[DataType],
494
+ is_same_semantics: Optional[bool],
495
+ semantic_hash: Optional[int],
496
+ storage_level: Optional[StorageLevel],
497
+ ):
498
+ self.schema = schema
499
+ self.explain_string = explain_string
500
+ self.tree_string = tree_string
501
+ self.is_local = is_local
502
+ self.is_streaming = is_streaming
503
+ self.input_files = input_files
504
+ self.spark_version = spark_version
505
+ self.parsed = parsed
506
+ self.is_same_semantics = is_same_semantics
507
+ self.semantic_hash = semantic_hash
508
+ self.storage_level = storage_level
509
+
510
+ @classmethod
511
+ def fromProto(cls, pb: Any) -> "AnalyzeResult":
512
+ schema: Optional[DataType] = None
513
+ explain_string: Optional[str] = None
514
+ tree_string: Optional[str] = None
515
+ is_local: Optional[bool] = None
516
+ is_streaming: Optional[bool] = None
517
+ input_files: Optional[List[str]] = None
518
+ spark_version: Optional[str] = None
519
+ parsed: Optional[DataType] = None
520
+ is_same_semantics: Optional[bool] = None
521
+ semantic_hash: Optional[int] = None
522
+ storage_level: Optional[StorageLevel] = None
523
+
524
+ if pb.HasField("schema"):
525
+ schema = types.proto_schema_to_pyspark_data_type(pb.schema.schema)
526
+ elif pb.HasField("explain"):
527
+ explain_string = pb.explain.explain_string
528
+ elif pb.HasField("tree_string"):
529
+ tree_string = pb.tree_string.tree_string
530
+ elif pb.HasField("is_local"):
531
+ is_local = pb.is_local.is_local
532
+ elif pb.HasField("is_streaming"):
533
+ is_streaming = pb.is_streaming.is_streaming
534
+ elif pb.HasField("input_files"):
535
+ input_files = pb.input_files.files
536
+ elif pb.HasField("spark_version"):
537
+ spark_version = pb.spark_version.version
538
+ elif pb.HasField("ddl_parse"):
539
+ parsed = types.proto_schema_to_pyspark_data_type(pb.ddl_parse.parsed)
540
+ elif pb.HasField("same_semantics"):
541
+ is_same_semantics = pb.same_semantics.result
542
+ elif pb.HasField("semantic_hash"):
543
+ semantic_hash = pb.semantic_hash.result
544
+ elif pb.HasField("persist"):
545
+ pass
546
+ elif pb.HasField("unpersist"):
547
+ pass
548
+ elif pb.HasField("get_storage_level"):
549
+ storage_level = proto_to_storage_level(pb.get_storage_level.storage_level)
550
+ else:
551
+ raise SparkConnectException("No analyze result found!")
552
+
553
+ return AnalyzeResult(
554
+ schema,
555
+ explain_string,
556
+ tree_string,
557
+ is_local,
558
+ is_streaming,
559
+ input_files,
560
+ spark_version,
561
+ parsed,
562
+ is_same_semantics,
563
+ semantic_hash,
564
+ storage_level,
565
+ )
566
+
567
+
568
+ class ConfigResult:
569
+ def __init__(self, pairs: List[Tuple[str, Optional[str]]], warnings: List[str]):
570
+ self.pairs = pairs
571
+ self.warnings = warnings
572
+
573
+ @classmethod
574
+ def fromProto(cls, pb: pb2.ConfigResponse) -> "ConfigResult":
575
+ return ConfigResult(
576
+ pairs=[(pair.key, pair.value if pair.HasField("value") else None) for pair in pb.pairs],
577
+ warnings=list(pb.warnings),
578
+ )
579
+
580
+
581
+ class SparkConnectClient(object):
582
+ """
583
+ Conceptually the remote spark session that communicates with the server
584
+ """
585
+
586
+ @classmethod
587
+ def retry_exception(cls, e: Exception) -> bool:
588
+ """
589
+ Helper function that is used to identify if an exception thrown by the server
590
+ can be retried or not.
591
+
592
+ Parameters
593
+ ----------
594
+ e : Exception
595
+ The GRPC error as received from the server. Typed as Exception, because other exception
596
+ thrown during client processing can be passed here as well.
597
+
598
+ Returns
599
+ -------
600
+ True if the exception can be retried, False otherwise.
601
+
602
+ """
603
+ if not isinstance(e, grpc.RpcError):
604
+ return False
605
+
606
+ if e.code() in [grpc.StatusCode.INTERNAL]:
607
+ msg = str(e)
608
+
609
+ # This error happens if another RPC preempts this RPC.
610
+ if "INVALID_CURSOR.DISCONNECTED" in msg:
611
+ return True
612
+
613
+ if e.code() == grpc.StatusCode.UNAVAILABLE:
614
+ return True
615
+
616
+ return False
617
+
618
+ def __init__(
619
+ self,
620
+ connection: Union[str, ChannelBuilder],
621
+ user_id: Optional[str] = None,
622
+ channel_options: Optional[List[Tuple[str, Any]]] = None,
623
+ retry_policy: Optional[Dict[str, Any]] = None,
624
+ use_reattachable_execute: bool = True,
625
+ ):
626
+ """
627
+ Creates a new SparkSession for the Spark Connect interface.
628
+
629
+ Parameters
630
+ ----------
631
+ connection : str or :class:`ChannelBuilder`
632
+ Connection string that is used to extract the connection parameters and configure
633
+ the GRPC connection. Or instance of ChannelBuilder that creates GRPC connection.
634
+ Defaults to `sc://localhost`.
635
+ user_id : str, optional
636
+ Optional unique user ID that is used to differentiate multiple users and
637
+ isolate their Spark Sessions. If the `user_id` is not set, will default to
638
+ the $USER environment. Defining the user ID as part of the connection string
639
+ takes precedence.
640
+ channel_options: list of tuple, optional
641
+ Additional options that can be passed to the GRPC channel construction.
642
+ retry_policy: dict of str and any, optional
643
+ Additional configuration for retrying. There are four configurations as below
644
+ * ``max_retries``
645
+ Maximum number of tries default 15
646
+ * ``backoff_multiplier``
647
+ Backoff multiplier for the policy. Default: 4(ms)
648
+ * ``initial_backoff``
649
+ Backoff to wait before the first retry. Default: 50(ms)
650
+ * ``max_backoff``
651
+ Maximum backoff controls the maximum amount of time to wait before retrying
652
+ a failed request. Default: 60000(ms).
653
+ use_reattachable_execute: bool
654
+ Enable reattachable execution.
655
+ """
656
+ self.thread_local = threading.local()
657
+
658
+ # Parse the connection string.
659
+ self._builder = (
660
+ connection
661
+ if isinstance(connection, ChannelBuilder)
662
+ else ChannelBuilder(connection, channel_options)
663
+ )
664
+ self._user_id = None
665
+ self._retry_policy = {
666
+ # Please synchronize changes here with Scala side
667
+ # GrpcRetryHandler.scala
668
+ #
669
+ # Note: the number of retries is selected so that the maximum tolerated wait
670
+ # is guaranteed to be at least 10 minutes
671
+ "max_retries": 15,
672
+ "backoff_multiplier": 4.0,
673
+ "initial_backoff": 50,
674
+ "max_backoff": 60000,
675
+ "jitter": 500,
676
+ "min_jitter_threshold": 2000,
677
+ }
678
+ if retry_policy:
679
+ self._retry_policy.update(retry_policy)
680
+
681
+ if self._builder.session_id is None:
682
+ # Generate a unique session ID for this client. This UUID must be unique to allow
683
+ # concurrent Spark sessions of the same user. If the channel is closed, creating
684
+ # a new client will create a new session ID.
685
+ self._session_id = str(uuid.uuid4())
686
+ else:
687
+ # Use the pre-defined session ID.
688
+ self._session_id = str(self._builder.session_id)
689
+
690
+ if self._builder.userId is not None:
691
+ self._user_id = self._builder.userId
692
+ elif user_id is not None:
693
+ self._user_id = user_id
694
+ else:
695
+ self._user_id = os.getenv("USER", None)
696
+
697
+ self._channel = self._builder.toChannel()
698
+ self._closed = False
699
+ self._stub = grpc_lib.SparkConnectServiceStub(self._channel)
700
+ self._artifact_manager = ArtifactManager(
701
+ self._user_id, self._session_id, self._channel, self._builder.metadata()
702
+ )
703
+ self._use_reattachable_execute = use_reattachable_execute
704
+ # Configure logging for the SparkConnect client.
705
+
706
+ def _retrying(self) -> "Retrying":
707
+ return Retrying(
708
+ can_retry=SparkConnectClient.retry_exception, **self._retry_policy # type: ignore
709
+ )
710
+
711
+ def disable_reattachable_execute(self) -> "SparkConnectClient":
712
+ self._use_reattachable_execute = False
713
+ return self
714
+
715
+ def enable_reattachable_execute(self) -> "SparkConnectClient":
716
+ self._use_reattachable_execute = True
717
+ return self
718
+
719
+ def register_udf(
720
+ self,
721
+ function: Any,
722
+ return_type: "DataTypeOrString",
723
+ name: Optional[str] = None,
724
+ eval_type: int = PythonEvalType.SQL_BATCHED_UDF,
725
+ deterministic: bool = True,
726
+ ) -> str:
727
+ """
728
+ Create a temporary UDF in the session catalog on the other side. We generate a
729
+ temporary name for it.
730
+ """
731
+
732
+ if name is None:
733
+ name = f"fun_{uuid.uuid4().hex}"
734
+
735
+ # construct a PythonUDF
736
+ py_udf = PythonUDF(
737
+ output_type=return_type,
738
+ eval_type=eval_type,
739
+ func=function,
740
+ python_ver="%d.%d" % sys.version_info[:2],
741
+ )
742
+
743
+ # construct a CommonInlineUserDefinedFunction
744
+ fun = CommonInlineUserDefinedFunction(
745
+ function_name=name,
746
+ arguments=[],
747
+ function=py_udf,
748
+ deterministic=deterministic,
749
+ ).to_plan_udf(self)
750
+
751
+ # construct the request
752
+ req = self._execute_plan_request_with_metadata()
753
+ req.plan.command.register_function.CopyFrom(fun)
754
+
755
+ self._execute(req)
756
+ return name
757
+
758
+ def register_udtf(
759
+ self,
760
+ function: Any,
761
+ return_type: "DataTypeOrString",
762
+ name: str,
763
+ eval_type: int = PythonEvalType.SQL_TABLE_UDF,
764
+ deterministic: bool = True,
765
+ ) -> str:
766
+ """
767
+ Register a user-defined table function (UDTF) in the session catalog
768
+ as a temporary function. The return type, if specified, must be a
769
+ struct type and it's validated when building the proto message
770
+ for the PythonUDTF.
771
+ """
772
+ udtf = PythonUDTF(
773
+ func=function,
774
+ return_type=return_type,
775
+ eval_type=eval_type,
776
+ python_ver=get_python_ver(),
777
+ )
778
+
779
+ func = CommonInlineUserDefinedTableFunction(
780
+ function_name=name,
781
+ function=udtf,
782
+ deterministic=deterministic,
783
+ arguments=[],
784
+ ).udtf_plan(self)
785
+
786
+ req = self._execute_plan_request_with_metadata()
787
+ req.plan.command.register_table_function.CopyFrom(func)
788
+
789
+ self._execute(req)
790
+ return name
791
+
792
+ def register_java(
793
+ self,
794
+ name: str,
795
+ javaClassName: str,
796
+ return_type: Optional["DataTypeOrString"] = None,
797
+ aggregate: bool = False,
798
+ ) -> None:
799
+ # construct a JavaUDF
800
+ if return_type is None:
801
+ java_udf = JavaUDF(class_name=javaClassName, aggregate=aggregate)
802
+ else:
803
+ java_udf = JavaUDF(class_name=javaClassName, output_type=return_type)
804
+ fun = CommonInlineUserDefinedFunction(
805
+ function_name=name,
806
+ function=java_udf,
807
+ ).to_plan_judf(self)
808
+ # construct the request
809
+ req = self._execute_plan_request_with_metadata()
810
+ req.plan.command.register_function.CopyFrom(fun)
811
+
812
+ self._execute(req)
813
+
814
+ def _build_metrics(self, metrics: "pb2.ExecutePlanResponse.Metrics") -> Iterator[PlanMetrics]:
815
+ return (
816
+ PlanMetrics(
817
+ x.name,
818
+ x.plan_id,
819
+ x.parent,
820
+ [MetricValue(k, v.value, v.metric_type) for k, v in x.execution_metrics.items()],
821
+ )
822
+ for x in metrics.metrics
823
+ )
824
+
825
+ def _resources(self) -> Dict[str, ResourceInformation]:
826
+ logger.info("Fetching the resources")
827
+ cmd = pb2.Command()
828
+ cmd.get_resources_command.SetInParent()
829
+ (_, properties) = self.execute_command(cmd)
830
+ resources = properties["get_resources_command_result"]
831
+ return resources
832
+
833
+ def _build_observed_metrics(
834
+ self, metrics: Sequence["pb2.ExecutePlanResponse.ObservedMetrics"]
835
+ ) -> Iterator[PlanObservedMetrics]:
836
+ return (PlanObservedMetrics(x.name, [v for v in x.values]) for x in metrics)
837
+
838
+ def to_table_as_iterator(self, plan: pb2.Plan) -> Iterator[Union[StructType, "pa.Table"]]:
839
+ """
840
+ Return given plan as a PyArrow Table iterator.
841
+ """
842
+ logger.info(f"Executing plan {self._proto_to_string(plan)}")
843
+ req = self._execute_plan_request_with_metadata()
844
+ req.plan.CopyFrom(plan)
845
+ for response in self._execute_and_fetch_as_iterator(req):
846
+ if isinstance(response, StructType):
847
+ yield response
848
+ elif isinstance(response, pa.RecordBatch):
849
+ yield pa.Table.from_batches([response])
850
+
851
+ def to_table(self, plan: pb2.Plan) -> Tuple["pa.Table", Optional[StructType]]:
852
+ """
853
+ Return given plan as a PyArrow Table.
854
+ """
855
+ logger.info(f"Executing plan {self._proto_to_string(plan)}")
856
+ req = self._execute_plan_request_with_metadata()
857
+ req.plan.CopyFrom(plan)
858
+ table, schema, _, _, _ = self._execute_and_fetch(req)
859
+ assert table is not None
860
+ return table, schema
861
+
862
+ def to_pandas(self, plan: pb2.Plan) -> "pd.DataFrame":
863
+ """
864
+ Return given plan as a pandas DataFrame.
865
+ """
866
+ logger.info(f"Executing plan {self._proto_to_string(plan)}")
867
+ req = self._execute_plan_request_with_metadata()
868
+ req.plan.CopyFrom(plan)
869
+ (self_destruct_conf,) = self.get_config_with_defaults(
870
+ ("spark.sql.execution.arrow.pyspark.selfDestruct.enabled", "false"),
871
+ )
872
+ self_destruct = cast(str, self_destruct_conf).lower() == "true"
873
+ table, schema, metrics, observed_metrics, _ = self._execute_and_fetch(
874
+ req, self_destruct=self_destruct
875
+ )
876
+ assert table is not None
877
+
878
+ schema = schema or from_arrow_schema(table.schema, prefer_timestamp_ntz=True)
879
+ assert schema is not None and isinstance(schema, StructType)
880
+
881
+ # Rename columns to avoid duplicated column names.
882
+ renamed_table = table.rename_columns([f"col_{i}" for i in range(table.num_columns)])
883
+ if self_destruct:
884
+ # Configure PyArrow to use as little memory as possible:
885
+ # self_destruct - free columns as they are converted
886
+ # split_blocks - create a separate Pandas block for each column
887
+ # use_threads - convert one column at a time
888
+ pandas_options = {
889
+ "self_destruct": True,
890
+ "split_blocks": True,
891
+ "use_threads": False,
892
+ }
893
+ pdf = renamed_table.to_pandas(**pandas_options)
894
+ else:
895
+ pdf = renamed_table.to_pandas()
896
+ pdf.columns = schema.names
897
+
898
+ if len(pdf.columns) > 0:
899
+ timezone: Optional[str] = None
900
+ if any(_has_type(f.dataType, TimestampType) for f in schema.fields):
901
+ (timezone,) = self.get_configs("spark.sql.session.timeZone")
902
+
903
+ struct_in_pandas: Optional[str] = None
904
+ error_on_duplicated_field_names: bool = False
905
+ if any(_has_type(f.dataType, StructType) for f in schema.fields):
906
+ (struct_in_pandas,) = self.get_config_with_defaults(
907
+ ("spark.sql.execution.pandas.structHandlingMode", "legacy"),
908
+ )
909
+
910
+ if struct_in_pandas == "legacy":
911
+ error_on_duplicated_field_names = True
912
+ struct_in_pandas = "dict"
913
+
914
+ pdf = pd.concat(
915
+ [
916
+ _create_converter_to_pandas(
917
+ field.dataType,
918
+ field.nullable,
919
+ timezone=timezone,
920
+ struct_in_pandas=struct_in_pandas,
921
+ error_on_duplicated_field_names=error_on_duplicated_field_names,
922
+ )(pser)
923
+ for (_, pser), field, pa_field in zip(pdf.items(), schema.fields, table.schema)
924
+ ],
925
+ axis="columns",
926
+ )
927
+
928
+ if len(metrics) > 0:
929
+ pdf.attrs["metrics"] = metrics
930
+ if len(observed_metrics) > 0:
931
+ pdf.attrs["observed_metrics"] = observed_metrics
932
+ return pdf
933
+
934
+ def _proto_to_string(self, p: google.protobuf.message.Message) -> str:
935
+ """
936
+ Helper method to generate a one line string representation of the plan.
937
+
938
+ Parameters
939
+ ----------
940
+ p : google.protobuf.message.Message
941
+ Generic Message type
942
+
943
+ Returns
944
+ -------
945
+ Single line string of the serialized proto message.
946
+ """
947
+ return text_format.MessageToString(p, as_one_line=True)
948
+
949
+ def schema(self, plan: pb2.Plan) -> StructType:
950
+ """
951
+ Return schema for given plan.
952
+ """
953
+ logger.info(f"Schema for plan: {self._proto_to_string(plan)}")
954
+ schema = self._analyze(method="schema", plan=plan).schema
955
+ assert schema is not None
956
+ # Server side should populate the struct field which is the schema.
957
+ assert isinstance(schema, StructType)
958
+ return schema
959
+
960
+ def explain_string(self, plan: pb2.Plan, explain_mode: str = "extended") -> str:
961
+ """
962
+ Return explain string for given plan.
963
+ """
964
+ logger.info(f"Explain (mode={explain_mode}) for plan {self._proto_to_string(plan)}")
965
+ result = self._analyze(
966
+ method="explain", plan=plan, explain_mode=explain_mode
967
+ ).explain_string
968
+ assert result is not None
969
+ return result
970
+
971
+ def execute_command(
972
+ self, command: pb2.Command
973
+ ) -> Tuple[Optional[pd.DataFrame], Dict[str, Any]]:
974
+ """
975
+ Execute given command.
976
+ """
977
+ logger.info(f"Execute command for command {self._proto_to_string(command)}")
978
+ req = self._execute_plan_request_with_metadata()
979
+ if self._user_id:
980
+ req.user_context.user_id = self._user_id
981
+ req.plan.command.CopyFrom(command)
982
+ data, _, _, _, properties = self._execute_and_fetch(req)
983
+ if data is not None:
984
+ return (data.to_pandas(), properties)
985
+ else:
986
+ return (None, properties)
987
+
988
+ def same_semantics(self, plan: pb2.Plan, other: pb2.Plan) -> bool:
989
+ """
990
+ return if two plans have the same semantics.
991
+ """
992
+ result = self._analyze(method="same_semantics", plan=plan, other=other).is_same_semantics
993
+ assert result is not None
994
+ return result
995
+
996
+ def semantic_hash(self, plan: pb2.Plan) -> int:
997
+ """
998
+ returns a `hashCode` of the logical query plan.
999
+ """
1000
+ result = self._analyze(method="semantic_hash", plan=plan).semantic_hash
1001
+ assert result is not None
1002
+ return result
1003
+
1004
+ def close(self) -> None:
1005
+ """
1006
+ Close the channel.
1007
+ """
1008
+ ExecutePlanResponseReattachableIterator.shutdown()
1009
+ self._channel.close()
1010
+ self._closed = True
1011
+
1012
+ @property
1013
+ def is_closed(self) -> bool:
1014
+ """
1015
+ Returns if the channel was closed previously using close() method
1016
+ """
1017
+ return self._closed
1018
+
1019
+ @property
1020
+ def host(self) -> str:
1021
+ """
1022
+ The hostname where this client intends to connect.
1023
+ """
1024
+ return self._builder.host
1025
+
1026
+ @property
1027
+ def token(self) -> Optional[str]:
1028
+ """
1029
+ The authentication bearer token during connection.
1030
+ If authentication is not using a bearer token, None will be returned.
1031
+ """
1032
+ return self._builder._token
1033
+
1034
+ def _execute_plan_request_with_metadata(self) -> pb2.ExecutePlanRequest:
1035
+ req = pb2.ExecutePlanRequest(
1036
+ session_id=self._session_id,
1037
+ client_type=self._builder.userAgent,
1038
+ tags=list(self.get_tags()),
1039
+ )
1040
+ if self._user_id:
1041
+ req.user_context.user_id = self._user_id
1042
+ return req
1043
+
1044
+ def _analyze_plan_request_with_metadata(self) -> pb2.AnalyzePlanRequest:
1045
+ req = pb2.AnalyzePlanRequest()
1046
+ req.session_id = self._session_id
1047
+ req.client_type = self._builder.userAgent
1048
+ if self._user_id:
1049
+ req.user_context.user_id = self._user_id
1050
+ return req
1051
+
1052
+ def _analyze(self, method: str, **kwargs: Any) -> AnalyzeResult:
1053
+ """
1054
+ Call the analyze RPC of Spark Connect.
1055
+
1056
+ Returns
1057
+ -------
1058
+ The result of the analyze call.
1059
+ """
1060
+ req = self._analyze_plan_request_with_metadata()
1061
+ if method == "schema":
1062
+ req.schema.plan.CopyFrom(cast(pb2.Plan, kwargs.get("plan")))
1063
+ elif method == "explain":
1064
+ req.explain.plan.CopyFrom(cast(pb2.Plan, kwargs.get("plan")))
1065
+ explain_mode = kwargs.get("explain_mode")
1066
+ if explain_mode not in ["simple", "extended", "codegen", "cost", "formatted"]:
1067
+ raise PySparkValueError(
1068
+ error_class="UNKNOWN_EXPLAIN_MODE",
1069
+ message_parameters={
1070
+ "explain_mode": str(explain_mode),
1071
+ },
1072
+ )
1073
+ if explain_mode == "simple":
1074
+ req.explain.explain_mode = (
1075
+ pb2.AnalyzePlanRequest.Explain.ExplainMode.EXPLAIN_MODE_SIMPLE
1076
+ )
1077
+ elif explain_mode == "extended":
1078
+ req.explain.explain_mode = (
1079
+ pb2.AnalyzePlanRequest.Explain.ExplainMode.EXPLAIN_MODE_EXTENDED
1080
+ )
1081
+ elif explain_mode == "cost":
1082
+ req.explain.explain_mode = (
1083
+ pb2.AnalyzePlanRequest.Explain.ExplainMode.EXPLAIN_MODE_COST
1084
+ )
1085
+ elif explain_mode == "codegen":
1086
+ req.explain.explain_mode = (
1087
+ pb2.AnalyzePlanRequest.Explain.ExplainMode.EXPLAIN_MODE_CODEGEN
1088
+ )
1089
+ else: # formatted
1090
+ req.explain.explain_mode = (
1091
+ pb2.AnalyzePlanRequest.Explain.ExplainMode.EXPLAIN_MODE_FORMATTED
1092
+ )
1093
+ elif method == "tree_string":
1094
+ req.tree_string.plan.CopyFrom(cast(pb2.Plan, kwargs.get("plan")))
1095
+ level = kwargs.get("level")
1096
+ if level and isinstance(level, int):
1097
+ req.tree_string.level = level
1098
+ elif method == "is_local":
1099
+ req.is_local.plan.CopyFrom(cast(pb2.Plan, kwargs.get("plan")))
1100
+ elif method == "is_streaming":
1101
+ req.is_streaming.plan.CopyFrom(cast(pb2.Plan, kwargs.get("plan")))
1102
+ elif method == "input_files":
1103
+ req.input_files.plan.CopyFrom(cast(pb2.Plan, kwargs.get("plan")))
1104
+ elif method == "spark_version":
1105
+ req.spark_version.SetInParent()
1106
+ elif method == "ddl_parse":
1107
+ req.ddl_parse.ddl_string = cast(str, kwargs.get("ddl_string"))
1108
+ elif method == "same_semantics":
1109
+ req.same_semantics.target_plan.CopyFrom(cast(pb2.Plan, kwargs.get("plan")))
1110
+ req.same_semantics.other_plan.CopyFrom(cast(pb2.Plan, kwargs.get("other")))
1111
+ elif method == "semantic_hash":
1112
+ req.semantic_hash.plan.CopyFrom(cast(pb2.Plan, kwargs.get("plan")))
1113
+ elif method == "persist":
1114
+ req.persist.relation.CopyFrom(cast(pb2.Relation, kwargs.get("relation")))
1115
+ if kwargs.get("storage_level", None) is not None:
1116
+ storage_level = cast(StorageLevel, kwargs.get("storage_level"))
1117
+ req.persist.storage_level.CopyFrom(storage_level_to_proto(storage_level))
1118
+ elif method == "unpersist":
1119
+ req.unpersist.relation.CopyFrom(cast(pb2.Relation, kwargs.get("relation")))
1120
+ if kwargs.get("blocking", None) is not None:
1121
+ req.unpersist.blocking = cast(bool, kwargs.get("blocking"))
1122
+ elif method == "get_storage_level":
1123
+ req.get_storage_level.relation.CopyFrom(cast(pb2.Relation, kwargs.get("relation")))
1124
+ else:
1125
+ raise PySparkValueError(
1126
+ error_class="UNSUPPORTED_OPERATION",
1127
+ message_parameters={
1128
+ "operation": method,
1129
+ },
1130
+ )
1131
+
1132
+ try:
1133
+ for attempt in self._retrying():
1134
+ with attempt:
1135
+ resp = self._stub.AnalyzePlan(req, metadata=self._builder.metadata())
1136
+ if resp.session_id != self._session_id:
1137
+ raise SparkConnectException(
1138
+ "Received incorrect session identifier for request:"
1139
+ f"{resp.session_id} != {self._session_id}"
1140
+ )
1141
+ return AnalyzeResult.fromProto(resp)
1142
+ raise SparkConnectException("Invalid state during retry exception handling.")
1143
+ except Exception as error:
1144
+ self._handle_error(error)
1145
+
1146
+ def _execute(self, req: pb2.ExecutePlanRequest) -> None:
1147
+ """
1148
+ Execute the passed request `req` and drop all results.
1149
+
1150
+ Parameters
1151
+ ----------
1152
+ req : pb2.ExecutePlanRequest
1153
+ Proto representation of the plan.
1154
+
1155
+ """
1156
+ logger.info("Execute")
1157
+
1158
+ def handle_response(b: pb2.ExecutePlanResponse) -> None:
1159
+ if b.session_id != self._session_id:
1160
+ raise SparkConnectException(
1161
+ "Received incorrect session identifier for request: "
1162
+ f"{b.session_id} != {self._session_id}"
1163
+ )
1164
+
1165
+ try:
1166
+ if self._use_reattachable_execute:
1167
+ # Don't use retryHandler - own retry handling is inside.
1168
+ generator = ExecutePlanResponseReattachableIterator(
1169
+ req, self._stub, self._retry_policy, self._builder.metadata()
1170
+ )
1171
+ for b in generator:
1172
+ handle_response(b)
1173
+ else:
1174
+ for attempt in self._retrying():
1175
+ with attempt:
1176
+ for b in self._stub.ExecutePlan(req, metadata=self._builder.metadata()):
1177
+ handle_response(b)
1178
+ except Exception as error:
1179
+ self._handle_error(error)
1180
+
1181
+ def _execute_and_fetch_as_iterator(
1182
+ self, req: pb2.ExecutePlanRequest
1183
+ ) -> Iterator[
1184
+ Union[
1185
+ "pa.RecordBatch",
1186
+ StructType,
1187
+ PlanMetrics,
1188
+ PlanObservedMetrics,
1189
+ Dict[str, Any],
1190
+ ]
1191
+ ]:
1192
+ logger.info("ExecuteAndFetchAsIterator")
1193
+
1194
+ def handle_response(
1195
+ b: pb2.ExecutePlanResponse,
1196
+ ) -> Iterator[
1197
+ Union[
1198
+ "pa.RecordBatch",
1199
+ StructType,
1200
+ PlanMetrics,
1201
+ PlanObservedMetrics,
1202
+ Dict[str, Any],
1203
+ ]
1204
+ ]:
1205
+ if b.session_id != self._session_id:
1206
+ raise SparkConnectException(
1207
+ "Received incorrect session identifier for request: "
1208
+ f"{b.session_id} != {self._session_id}"
1209
+ )
1210
+ if b.HasField("metrics"):
1211
+ logger.debug("Received metric batch.")
1212
+ yield from self._build_metrics(b.metrics)
1213
+ if b.observed_metrics:
1214
+ logger.debug("Received observed metric batch.")
1215
+ yield from self._build_observed_metrics(b.observed_metrics)
1216
+ if b.HasField("schema"):
1217
+ logger.debug("Received the schema.")
1218
+ dt = types.proto_schema_to_pyspark_data_type(b.schema)
1219
+ assert isinstance(dt, StructType)
1220
+ yield dt
1221
+ if b.HasField("sql_command_result"):
1222
+ logger.debug("Received the SQL command result.")
1223
+ yield {"sql_command_result": b.sql_command_result.relation}
1224
+ if b.HasField("write_stream_operation_start_result"):
1225
+ field = "write_stream_operation_start_result"
1226
+ yield {field: b.write_stream_operation_start_result}
1227
+ if b.HasField("streaming_query_command_result"):
1228
+ yield {"streaming_query_command_result": b.streaming_query_command_result}
1229
+ if b.HasField("streaming_query_manager_command_result"):
1230
+ cmd_result = b.streaming_query_manager_command_result
1231
+ yield {"streaming_query_manager_command_result": cmd_result}
1232
+ if b.HasField("get_resources_command_result"):
1233
+ resources = {}
1234
+ for key, resource in b.get_resources_command_result.resources.items():
1235
+ name = resource.name
1236
+ addresses = [address for address in resource.addresses]
1237
+ resources[key] = ResourceInformation(name, addresses)
1238
+ yield {"get_resources_command_result": resources}
1239
+ if b.HasField("arrow_batch"):
1240
+ logger.debug(
1241
+ f"Received arrow batch rows={b.arrow_batch.row_count} "
1242
+ f"size={len(b.arrow_batch.data)}"
1243
+ )
1244
+
1245
+ with pa.ipc.open_stream(b.arrow_batch.data) as reader:
1246
+ for batch in reader:
1247
+ assert isinstance(batch, pa.RecordBatch)
1248
+ yield batch
1249
+
1250
+ try:
1251
+ if self._use_reattachable_execute:
1252
+ # Don't use retryHandler - own retry handling is inside.
1253
+ generator = ExecutePlanResponseReattachableIterator(
1254
+ req, self._stub, self._retry_policy, self._builder.metadata()
1255
+ )
1256
+ for b in generator:
1257
+ yield from handle_response(b)
1258
+ else:
1259
+ for attempt in self._retrying():
1260
+ with attempt:
1261
+ for b in self._stub.ExecutePlan(req, metadata=self._builder.metadata()):
1262
+ yield from handle_response(b)
1263
+ except Exception as error:
1264
+ self._handle_error(error)
1265
+
1266
+ def _execute_and_fetch(
1267
+ self, req: pb2.ExecutePlanRequest, self_destruct: bool = False
1268
+ ) -> Tuple[
1269
+ Optional["pa.Table"],
1270
+ Optional[StructType],
1271
+ List[PlanMetrics],
1272
+ List[PlanObservedMetrics],
1273
+ Dict[str, Any],
1274
+ ]:
1275
+ logger.info("ExecuteAndFetch")
1276
+
1277
+ observed_metrics: List[PlanObservedMetrics] = []
1278
+ metrics: List[PlanMetrics] = []
1279
+ batches: List[pa.RecordBatch] = []
1280
+ schema: Optional[StructType] = None
1281
+ properties: Dict[str, Any] = {}
1282
+
1283
+ for response in self._execute_and_fetch_as_iterator(req):
1284
+ if isinstance(response, StructType):
1285
+ schema = response
1286
+ elif isinstance(response, pa.RecordBatch):
1287
+ batches.append(response)
1288
+ elif isinstance(response, PlanMetrics):
1289
+ metrics.append(response)
1290
+ elif isinstance(response, PlanObservedMetrics):
1291
+ observed_metrics.append(response)
1292
+ elif isinstance(response, dict):
1293
+ properties.update(**response)
1294
+ else:
1295
+ raise PySparkValueError(
1296
+ error_class="UNKNOWN_RESPONSE",
1297
+ message_parameters={
1298
+ "response": response,
1299
+ },
1300
+ )
1301
+
1302
+ if len(batches) > 0:
1303
+ if self_destruct:
1304
+ results = []
1305
+ for batch in batches:
1306
+ # self_destruct frees memory column-wise, but Arrow record batches are
1307
+ # oriented row-wise, so copies each column into its own allocation
1308
+ batch = pa.RecordBatch.from_arrays(
1309
+ [
1310
+ # This call actually reallocates the array
1311
+ pa.concat_arrays([array])
1312
+ for array in batch
1313
+ ],
1314
+ schema=batch.schema,
1315
+ )
1316
+ results.append(batch)
1317
+ table = pa.Table.from_batches(batches=results)
1318
+ # Ensure only the table has a reference to the batches, so that
1319
+ # self_destruct (if enabled) is effective
1320
+ del results
1321
+ del batches
1322
+ else:
1323
+ table = pa.Table.from_batches(batches=batches)
1324
+ return table, schema, metrics, observed_metrics, properties
1325
+ else:
1326
+ return None, schema, metrics, observed_metrics, properties
1327
+
1328
+ def _config_request_with_metadata(self) -> pb2.ConfigRequest:
1329
+ req = pb2.ConfigRequest()
1330
+ req.session_id = self._session_id
1331
+ req.client_type = self._builder.userAgent
1332
+ if self._user_id:
1333
+ req.user_context.user_id = self._user_id
1334
+ return req
1335
+
1336
+ def get_configs(self, *keys: str) -> Tuple[Optional[str], ...]:
1337
+ op = pb2.ConfigRequest.Operation(get=pb2.ConfigRequest.Get(keys=keys))
1338
+ configs = dict(self.config(op).pairs)
1339
+ return tuple(configs.get(key) for key in keys)
1340
+
1341
+ def get_config_with_defaults(
1342
+ self, *pairs: Tuple[str, Optional[str]]
1343
+ ) -> Tuple[Optional[str], ...]:
1344
+ op = pb2.ConfigRequest.Operation(
1345
+ get_with_default=pb2.ConfigRequest.GetWithDefault(
1346
+ pairs=[pb2.KeyValue(key=key, value=default) for key, default in pairs]
1347
+ )
1348
+ )
1349
+ configs = dict(self.config(op).pairs)
1350
+ return tuple(configs.get(key) for key, _ in pairs)
1351
+
1352
+ def config(self, operation: pb2.ConfigRequest.Operation) -> ConfigResult:
1353
+ """
1354
+ Call the config RPC of Spark Connect.
1355
+
1356
+ Parameters
1357
+ ----------
1358
+ operation : str
1359
+ Operation kind
1360
+
1361
+ Returns
1362
+ -------
1363
+ The result of the config call.
1364
+ """
1365
+ req = self._config_request_with_metadata()
1366
+ req.operation.CopyFrom(operation)
1367
+ try:
1368
+ for attempt in self._retrying():
1369
+ with attempt:
1370
+ resp = self._stub.Config(req, metadata=self._builder.metadata())
1371
+ if resp.session_id != self._session_id:
1372
+ raise SparkConnectException(
1373
+ "Received incorrect session identifier for request:"
1374
+ f"{resp.session_id} != {self._session_id}"
1375
+ )
1376
+ return ConfigResult.fromProto(resp)
1377
+ raise SparkConnectException("Invalid state during retry exception handling.")
1378
+ except Exception as error:
1379
+ self._handle_error(error)
1380
+
1381
+ def _interrupt_request(
1382
+ self, interrupt_type: str, id_or_tag: Optional[str] = None
1383
+ ) -> pb2.InterruptRequest:
1384
+ req = pb2.InterruptRequest()
1385
+ req.session_id = self._session_id
1386
+ req.client_type = self._builder.userAgent
1387
+ if interrupt_type == "all":
1388
+ req.interrupt_type = pb2.InterruptRequest.InterruptType.INTERRUPT_TYPE_ALL
1389
+ elif interrupt_type == "tag":
1390
+ assert id_or_tag is not None
1391
+ req.interrupt_type = pb2.InterruptRequest.InterruptType.INTERRUPT_TYPE_TAG
1392
+ req.operation_tag = id_or_tag
1393
+ elif interrupt_type == "operation":
1394
+ assert id_or_tag is not None
1395
+ req.interrupt_type = pb2.InterruptRequest.InterruptType.INTERRUPT_TYPE_OPERATION_ID
1396
+ req.operation_id = id_or_tag
1397
+ else:
1398
+ raise PySparkValueError(
1399
+ error_class="UNKNOWN_INTERRUPT_TYPE",
1400
+ message_parameters={
1401
+ "interrupt_type": str(interrupt_type),
1402
+ },
1403
+ )
1404
+ if self._user_id:
1405
+ req.user_context.user_id = self._user_id
1406
+ return req
1407
+
1408
+ def interrupt_all(self) -> Optional[List[str]]:
1409
+ req = self._interrupt_request("all")
1410
+ try:
1411
+ for attempt in self._retrying():
1412
+ with attempt:
1413
+ resp = self._stub.Interrupt(req, metadata=self._builder.metadata())
1414
+ if resp.session_id != self._session_id:
1415
+ raise SparkConnectException(
1416
+ "Received incorrect session identifier for request:"
1417
+ f"{resp.session_id} != {self._session_id}"
1418
+ )
1419
+ return list(resp.interrupted_ids)
1420
+ raise SparkConnectException("Invalid state during retry exception handling.")
1421
+ except Exception as error:
1422
+ self._handle_error(error)
1423
+
1424
+ def interrupt_tag(self, tag: str) -> Optional[List[str]]:
1425
+ req = self._interrupt_request("tag", tag)
1426
+ try:
1427
+ for attempt in self._retrying():
1428
+ with attempt:
1429
+ resp = self._stub.Interrupt(req, metadata=self._builder.metadata())
1430
+ if resp.session_id != self._session_id:
1431
+ raise SparkConnectException(
1432
+ "Received incorrect session identifier for request:"
1433
+ f"{resp.session_id} != {self._session_id}"
1434
+ )
1435
+ return list(resp.interrupted_ids)
1436
+ raise SparkConnectException("Invalid state during retry exception handling.")
1437
+ except Exception as error:
1438
+ self._handle_error(error)
1439
+
1440
+ def interrupt_operation(self, op_id: str) -> Optional[List[str]]:
1441
+ req = self._interrupt_request("operation", op_id)
1442
+ try:
1443
+ for attempt in self._retrying():
1444
+ with attempt:
1445
+ resp = self._stub.Interrupt(req, metadata=self._builder.metadata())
1446
+ if resp.session_id != self._session_id:
1447
+ raise SparkConnectException(
1448
+ "Received incorrect session identifier for request:"
1449
+ f"{resp.session_id} != {self._session_id}"
1450
+ )
1451
+ return list(resp.interrupted_ids)
1452
+ raise SparkConnectException("Invalid state during retry exception handling.")
1453
+ except Exception as error:
1454
+ self._handle_error(error)
1455
+
1456
+ def add_tag(self, tag: str) -> None:
1457
+ self._throw_if_invalid_tag(tag)
1458
+ if not hasattr(self.thread_local, "tags"):
1459
+ self.thread_local.tags = set()
1460
+ self.thread_local.tags.add(tag)
1461
+
1462
+ def remove_tag(self, tag: str) -> None:
1463
+ self._throw_if_invalid_tag(tag)
1464
+ if not hasattr(self.thread_local, "tags"):
1465
+ self.thread_local.tags = set()
1466
+ self.thread_local.tags.remove(tag)
1467
+
1468
+ def get_tags(self) -> Set[str]:
1469
+ if not hasattr(self.thread_local, "tags"):
1470
+ self.thread_local.tags = set()
1471
+ return self.thread_local.tags
1472
+
1473
+ def clear_tags(self) -> None:
1474
+ self.thread_local.tags = set()
1475
+
1476
+ def _throw_if_invalid_tag(self, tag: str) -> None:
1477
+ """
1478
+ Validate if a tag for ExecutePlanRequest.tags is valid. Throw ``ValueError`` if
1479
+ not.
1480
+ """
1481
+ spark_job_tags_sep = ","
1482
+ if tag is None:
1483
+ raise ValueError("Spark Connect tag cannot be null.")
1484
+ if spark_job_tags_sep in tag:
1485
+ raise ValueError(f"Spark Connect tag cannot contain '{spark_job_tags_sep}'.")
1486
+ if len(tag) == 0:
1487
+ raise ValueError("Spark Connect tag cannot be an empty string.")
1488
+
1489
+ def _handle_error(self, error: Exception) -> NoReturn:
1490
+ """
1491
+ Handle errors that occur during RPC calls.
1492
+
1493
+ Parameters
1494
+ ----------
1495
+ error : Exception
1496
+ An exception thrown during RPC calls.
1497
+
1498
+ Returns
1499
+ -------
1500
+ Throws the appropriate internal Python exception.
1501
+ """
1502
+ if isinstance(error, grpc.RpcError):
1503
+ self._handle_rpc_error(error)
1504
+ elif isinstance(error, ValueError):
1505
+ if "Cannot invoke RPC" in str(error) and "closed" in str(error):
1506
+ raise SparkConnectException(
1507
+ error_class="NO_ACTIVE_SESSION", message_parameters=dict()
1508
+ ) from None
1509
+ raise error
1510
+
1511
+ def _handle_rpc_error(self, rpc_error: grpc.RpcError) -> NoReturn:
1512
+ """
1513
+ Error handling helper for dealing with GRPC Errors. On the server side, certain
1514
+ exceptions are enriched with additional RPC Status information. These are
1515
+ unpacked in this function and put into the exception.
1516
+
1517
+ To avoid overloading the user with GRPC errors, this message explicitly
1518
+ swallows the error context from the call. This GRPC Error is logged however,
1519
+ and can be enabled.
1520
+
1521
+ Parameters
1522
+ ----------
1523
+ rpc_error : grpc.RpcError
1524
+ RPC Error containing the details of the exception.
1525
+
1526
+ Returns
1527
+ -------
1528
+ Throws the appropriate internal Python exception.
1529
+ """
1530
+ logger.exception("GRPC Error received")
1531
+ # We have to cast the value here because, a RpcError is a Call as well.
1532
+ # https://grpc.github.io/grpc/python/grpc.html#grpc.UnaryUnaryMultiCallable.__call__
1533
+ status = rpc_status.from_call(cast(grpc.Call, rpc_error))
1534
+ if status:
1535
+ for d in status.details:
1536
+ if d.Is(error_details_pb2.ErrorInfo.DESCRIPTOR):
1537
+ info = error_details_pb2.ErrorInfo()
1538
+ d.Unpack(info)
1539
+ raise convert_exception(info, status.message) from None
1540
+
1541
+ raise SparkConnectGrpcException(status.message) from None
1542
+ else:
1543
+ raise SparkConnectGrpcException(str(rpc_error)) from None
1544
+
1545
+ def add_artifacts(self, *path: str, pyfile: bool, archive: bool, file: bool) -> None:
1546
+ self._artifact_manager.add_artifacts(*path, pyfile=pyfile, archive=archive, file=file)
1547
+
1548
+ def copy_from_local_to_fs(self, local_path: str, dest_path: str) -> None:
1549
+ self._artifact_manager._add_forward_to_fs_artifacts(local_path, dest_path)
1550
+
1551
+ def cache_artifact(self, blob: bytes) -> str:
1552
+ return self._artifact_manager.cache_artifact(blob)
1553
+
1554
+
1555
+ class RetryState:
1556
+ """
1557
+ Simple state helper that captures the state between retries of the exceptions. It
1558
+ keeps track of the last exception thrown and how many in total. When the task
1559
+ finishes successfully done() returns True.
1560
+ """
1561
+
1562
+ def __init__(self) -> None:
1563
+ self._exception: Optional[BaseException] = None
1564
+ self._done = False
1565
+ self._count = 0
1566
+
1567
+ def set_exception(self, exc: BaseException) -> None:
1568
+ self._exception = exc
1569
+ self._count += 1
1570
+
1571
+ def throw(self) -> None:
1572
+ if self._exception is None:
1573
+ raise RuntimeError("No exception is set")
1574
+ raise self._exception
1575
+
1576
+ def set_done(self) -> None:
1577
+ self._done = True
1578
+
1579
+ def count(self) -> int:
1580
+ return self._count
1581
+
1582
+ def done(self) -> bool:
1583
+ return self._done
1584
+
1585
+
1586
+ class AttemptManager:
1587
+ """
1588
+ Simple ContextManager that is used to capture the exception thrown inside the context.
1589
+ """
1590
+
1591
+ def __init__(self, check: Callable[..., bool], retry_state: RetryState) -> None:
1592
+ self._retry_state = retry_state
1593
+ self._can_retry = check
1594
+
1595
+ def __enter__(self) -> None:
1596
+ pass
1597
+
1598
+ def __exit__(
1599
+ self,
1600
+ exc_type: Optional[Type[BaseException]],
1601
+ exc_val: Optional[BaseException],
1602
+ exc_tb: Optional[TracebackType],
1603
+ ) -> Optional[bool]:
1604
+ if isinstance(exc_val, BaseException):
1605
+ # Swallow the exception.
1606
+ if self._can_retry(exc_val) or isinstance(exc_val, RetryException):
1607
+ self._retry_state.set_exception(exc_val)
1608
+ return True
1609
+ # Bubble up the exception.
1610
+ return False
1611
+ else:
1612
+ self._retry_state.set_done()
1613
+ return None
1614
+
1615
+ def is_first_try(self) -> bool:
1616
+ return self._retry_state._count == 0
1617
+
1618
+
1619
+ class Retrying:
1620
+ """
1621
+ This helper class is used as a generator together with a context manager to
1622
+ allow retrying exceptions in particular code blocks. The Retrying can be configured
1623
+ with a lambda function that is can be filtered what kind of exceptions should be
1624
+ retried.
1625
+
1626
+ In addition, there are several parameters that are used to configure the exponential
1627
+ backoff behavior.
1628
+
1629
+ An example to use this class looks like this:
1630
+
1631
+ .. code-block:: python
1632
+
1633
+ for attempt in Retrying(can_retry=lambda x: isinstance(x, TransientError)):
1634
+ with attempt:
1635
+ # do the work.
1636
+
1637
+ """
1638
+
1639
+ def __init__(
1640
+ self,
1641
+ max_retries: int,
1642
+ initial_backoff: int,
1643
+ max_backoff: int,
1644
+ backoff_multiplier: float,
1645
+ jitter: int,
1646
+ min_jitter_threshold: int,
1647
+ can_retry: Callable[..., bool] = lambda x: True,
1648
+ sleep: Callable[[float], None] = time.sleep,
1649
+ ) -> None:
1650
+ self._can_retry = can_retry
1651
+ self._max_retries = max_retries
1652
+ self._initial_backoff = initial_backoff
1653
+ self._max_backoff = max_backoff
1654
+ self._backoff_multiplier = backoff_multiplier
1655
+ self._jitter = jitter
1656
+ self._min_jitter_threshold = min_jitter_threshold
1657
+ self._sleep = sleep
1658
+
1659
+ def __iter__(self) -> Generator[AttemptManager, None, None]:
1660
+ """
1661
+ Generator function to wrap the exception producing code block.
1662
+
1663
+ Returns
1664
+ -------
1665
+ A generator that yields the current attempt.
1666
+ """
1667
+ retry_state = RetryState()
1668
+ next_backoff: float = self._initial_backoff
1669
+
1670
+ if self._max_retries < 0:
1671
+ raise ValueError("Can't have negative number of retries")
1672
+
1673
+ while not retry_state.done() and retry_state.count() <= self._max_retries:
1674
+ # Do backoff
1675
+ if retry_state.count() > 0:
1676
+ # Randomize backoff for this iteration
1677
+ backoff = next_backoff
1678
+ next_backoff = min(self._max_backoff, next_backoff * self._backoff_multiplier)
1679
+
1680
+ if backoff >= self._min_jitter_threshold:
1681
+ backoff += random.uniform(0, self._jitter)
1682
+
1683
+ logger.debug(f"Retrying call after {backoff} ms sleep")
1684
+ self._sleep(backoff / 1000.0)
1685
+ yield AttemptManager(self._can_retry, retry_state)
1686
+
1687
+ if not retry_state.done():
1688
+ # Exceeded number of retries, throw last exception we had
1689
+ retry_state.throw()