snowpark-connect 0.20.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of snowpark-connect might be problematic. Click here for more details.

Files changed (879) hide show
  1. snowflake/snowpark_connect/__init__.py +23 -0
  2. snowflake/snowpark_connect/analyze_plan/__init__.py +3 -0
  3. snowflake/snowpark_connect/analyze_plan/map_tree_string.py +38 -0
  4. snowflake/snowpark_connect/column_name_handler.py +735 -0
  5. snowflake/snowpark_connect/config.py +576 -0
  6. snowflake/snowpark_connect/constants.py +47 -0
  7. snowflake/snowpark_connect/control_server.py +52 -0
  8. snowflake/snowpark_connect/dataframe_name_handler.py +54 -0
  9. snowflake/snowpark_connect/date_time_format_mapping.py +399 -0
  10. snowflake/snowpark_connect/empty_dataframe.py +18 -0
  11. snowflake/snowpark_connect/error/__init__.py +11 -0
  12. snowflake/snowpark_connect/error/error_mapping.py +6174 -0
  13. snowflake/snowpark_connect/error/error_utils.py +321 -0
  14. snowflake/snowpark_connect/error/exceptions.py +24 -0
  15. snowflake/snowpark_connect/execute_plan/__init__.py +3 -0
  16. snowflake/snowpark_connect/execute_plan/map_execution_command.py +204 -0
  17. snowflake/snowpark_connect/execute_plan/map_execution_root.py +173 -0
  18. snowflake/snowpark_connect/execute_plan/utils.py +183 -0
  19. snowflake/snowpark_connect/expression/__init__.py +3 -0
  20. snowflake/snowpark_connect/expression/literal.py +90 -0
  21. snowflake/snowpark_connect/expression/map_cast.py +343 -0
  22. snowflake/snowpark_connect/expression/map_expression.py +293 -0
  23. snowflake/snowpark_connect/expression/map_extension.py +104 -0
  24. snowflake/snowpark_connect/expression/map_sql_expression.py +633 -0
  25. snowflake/snowpark_connect/expression/map_udf.py +142 -0
  26. snowflake/snowpark_connect/expression/map_unresolved_attribute.py +241 -0
  27. snowflake/snowpark_connect/expression/map_unresolved_extract_value.py +85 -0
  28. snowflake/snowpark_connect/expression/map_unresolved_function.py +9450 -0
  29. snowflake/snowpark_connect/expression/map_unresolved_star.py +218 -0
  30. snowflake/snowpark_connect/expression/map_update_fields.py +164 -0
  31. snowflake/snowpark_connect/expression/map_window_function.py +258 -0
  32. snowflake/snowpark_connect/expression/typer.py +125 -0
  33. snowflake/snowpark_connect/includes/__init__.py +0 -0
  34. snowflake/snowpark_connect/includes/jars/antlr4-runtime-4.9.3.jar +0 -0
  35. snowflake/snowpark_connect/includes/jars/commons-cli-1.5.0.jar +0 -0
  36. snowflake/snowpark_connect/includes/jars/commons-codec-1.16.1.jar +0 -0
  37. snowflake/snowpark_connect/includes/jars/commons-collections-3.2.2.jar +0 -0
  38. snowflake/snowpark_connect/includes/jars/commons-collections4-4.4.jar +0 -0
  39. snowflake/snowpark_connect/includes/jars/commons-compiler-3.1.9.jar +0 -0
  40. snowflake/snowpark_connect/includes/jars/commons-compress-1.26.0.jar +0 -0
  41. snowflake/snowpark_connect/includes/jars/commons-crypto-1.1.0.jar +0 -0
  42. snowflake/snowpark_connect/includes/jars/commons-dbcp-1.4.jar +0 -0
  43. snowflake/snowpark_connect/includes/jars/commons-io-2.16.1.jar +0 -0
  44. snowflake/snowpark_connect/includes/jars/commons-lang-2.6.jar +0 -0
  45. snowflake/snowpark_connect/includes/jars/commons-lang3-3.12.0.jar +0 -0
  46. snowflake/snowpark_connect/includes/jars/commons-logging-1.1.3.jar +0 -0
  47. snowflake/snowpark_connect/includes/jars/commons-math3-3.6.1.jar +0 -0
  48. snowflake/snowpark_connect/includes/jars/commons-pool-1.5.4.jar +0 -0
  49. snowflake/snowpark_connect/includes/jars/commons-text-1.10.0.jar +0 -0
  50. snowflake/snowpark_connect/includes/jars/hadoop-client-api-3.3.4.jar +0 -0
  51. snowflake/snowpark_connect/includes/jars/jackson-annotations-2.15.2.jar +0 -0
  52. snowflake/snowpark_connect/includes/jars/jackson-core-2.15.2.jar +0 -0
  53. snowflake/snowpark_connect/includes/jars/jackson-core-asl-1.9.13.jar +0 -0
  54. snowflake/snowpark_connect/includes/jars/jackson-databind-2.15.2.jar +0 -0
  55. snowflake/snowpark_connect/includes/jars/jackson-dataformat-yaml-2.15.2.jar +0 -0
  56. snowflake/snowpark_connect/includes/jars/jackson-datatype-jsr310-2.15.2.jar +0 -0
  57. snowflake/snowpark_connect/includes/jars/jackson-mapper-asl-1.9.13.jar +0 -0
  58. snowflake/snowpark_connect/includes/jars/jackson-module-scala_2.12-2.15.2.jar +0 -0
  59. snowflake/snowpark_connect/includes/jars/json4s-ast_2.12-3.7.0-M11.jar +0 -0
  60. snowflake/snowpark_connect/includes/jars/json4s-core_2.12-3.7.0-M11.jar +0 -0
  61. snowflake/snowpark_connect/includes/jars/json4s-jackson_2.12-3.7.0-M11.jar +0 -0
  62. snowflake/snowpark_connect/includes/jars/json4s-scalap_2.12-3.7.0-M11.jar +0 -0
  63. snowflake/snowpark_connect/includes/jars/kryo-shaded-4.0.2.jar +0 -0
  64. snowflake/snowpark_connect/includes/jars/log4j-1.2-api-2.20.0.jar +0 -0
  65. snowflake/snowpark_connect/includes/jars/log4j-api-2.20.0.jar +0 -0
  66. snowflake/snowpark_connect/includes/jars/log4j-core-2.20.0.jar +0 -0
  67. snowflake/snowpark_connect/includes/jars/log4j-slf4j2-impl-2.20.0.jar +0 -0
  68. snowflake/snowpark_connect/includes/jars/paranamer-2.8.jar +0 -0
  69. snowflake/snowpark_connect/includes/jars/scala-collection-compat_2.12-2.7.0.jar +0 -0
  70. snowflake/snowpark_connect/includes/jars/scala-compiler-2.12.18.jar +0 -0
  71. snowflake/snowpark_connect/includes/jars/scala-library-2.12.18.jar +0 -0
  72. snowflake/snowpark_connect/includes/jars/scala-parser-combinators_2.12-2.3.0.jar +0 -0
  73. snowflake/snowpark_connect/includes/jars/scala-reflect-2.12.18.jar +0 -0
  74. snowflake/snowpark_connect/includes/jars/scala-xml_2.12-2.1.0.jar +0 -0
  75. snowflake/snowpark_connect/includes/jars/slf4j-api-2.0.7.jar +0 -0
  76. snowflake/snowpark_connect/includes/jars/spark-catalyst_2.12-3.5.6.jar +0 -0
  77. snowflake/snowpark_connect/includes/jars/spark-common-utils_2.12-3.5.6.jar +0 -0
  78. snowflake/snowpark_connect/includes/jars/spark-core_2.12-3.5.6.jar +0 -0
  79. snowflake/snowpark_connect/includes/jars/spark-graphx_2.12-3.5.6.jar +0 -0
  80. snowflake/snowpark_connect/includes/jars/spark-hive-thriftserver_2.12-3.5.6.jar +0 -0
  81. snowflake/snowpark_connect/includes/jars/spark-hive_2.12-3.5.6.jar +0 -0
  82. snowflake/snowpark_connect/includes/jars/spark-kubernetes_2.12-3.5.6.jar +0 -0
  83. snowflake/snowpark_connect/includes/jars/spark-kvstore_2.12-3.5.6.jar +0 -0
  84. snowflake/snowpark_connect/includes/jars/spark-launcher_2.12-3.5.6.jar +0 -0
  85. snowflake/snowpark_connect/includes/jars/spark-mesos_2.12-3.5.6.jar +0 -0
  86. snowflake/snowpark_connect/includes/jars/spark-mllib-local_2.12-3.5.6.jar +0 -0
  87. snowflake/snowpark_connect/includes/jars/spark-mllib_2.12-3.5.6.jar +0 -0
  88. snowflake/snowpark_connect/includes/jars/spark-network-common_2.12-3.5.6.jar +0 -0
  89. snowflake/snowpark_connect/includes/jars/spark-network-shuffle_2.12-3.5.6.jar +0 -0
  90. snowflake/snowpark_connect/includes/jars/spark-repl_2.12-3.5.6.jar +0 -0
  91. snowflake/snowpark_connect/includes/jars/spark-sketch_2.12-3.5.6.jar +0 -0
  92. snowflake/snowpark_connect/includes/jars/spark-sql-api_2.12-3.5.6.jar +0 -0
  93. snowflake/snowpark_connect/includes/jars/spark-sql_2.12-3.5.6.jar +0 -0
  94. snowflake/snowpark_connect/includes/jars/spark-streaming_2.12-3.5.6.jar +0 -0
  95. snowflake/snowpark_connect/includes/jars/spark-tags_2.12-3.5.6.jar +0 -0
  96. snowflake/snowpark_connect/includes/jars/spark-unsafe_2.12-3.5.6.jar +0 -0
  97. snowflake/snowpark_connect/includes/jars/spark-yarn_2.12-3.5.6.jar +0 -0
  98. snowflake/snowpark_connect/includes/python/__init__.py +21 -0
  99. snowflake/snowpark_connect/includes/python/pyspark/__init__.py +173 -0
  100. snowflake/snowpark_connect/includes/python/pyspark/_globals.py +71 -0
  101. snowflake/snowpark_connect/includes/python/pyspark/_typing.pyi +43 -0
  102. snowflake/snowpark_connect/includes/python/pyspark/accumulators.py +341 -0
  103. snowflake/snowpark_connect/includes/python/pyspark/broadcast.py +383 -0
  104. snowflake/snowpark_connect/includes/python/pyspark/cloudpickle/__init__.py +8 -0
  105. snowflake/snowpark_connect/includes/python/pyspark/cloudpickle/cloudpickle.py +948 -0
  106. snowflake/snowpark_connect/includes/python/pyspark/cloudpickle/cloudpickle_fast.py +844 -0
  107. snowflake/snowpark_connect/includes/python/pyspark/cloudpickle/compat.py +18 -0
  108. snowflake/snowpark_connect/includes/python/pyspark/conf.py +276 -0
  109. snowflake/snowpark_connect/includes/python/pyspark/context.py +2601 -0
  110. snowflake/snowpark_connect/includes/python/pyspark/daemon.py +218 -0
  111. snowflake/snowpark_connect/includes/python/pyspark/errors/__init__.py +70 -0
  112. snowflake/snowpark_connect/includes/python/pyspark/errors/error_classes.py +889 -0
  113. snowflake/snowpark_connect/includes/python/pyspark/errors/exceptions/__init__.py +16 -0
  114. snowflake/snowpark_connect/includes/python/pyspark/errors/exceptions/base.py +228 -0
  115. snowflake/snowpark_connect/includes/python/pyspark/errors/exceptions/captured.py +307 -0
  116. snowflake/snowpark_connect/includes/python/pyspark/errors/exceptions/connect.py +190 -0
  117. snowflake/snowpark_connect/includes/python/pyspark/errors/tests/__init__.py +16 -0
  118. snowflake/snowpark_connect/includes/python/pyspark/errors/tests/test_errors.py +60 -0
  119. snowflake/snowpark_connect/includes/python/pyspark/errors/utils.py +116 -0
  120. snowflake/snowpark_connect/includes/python/pyspark/files.py +165 -0
  121. snowflake/snowpark_connect/includes/python/pyspark/find_spark_home.py +95 -0
  122. snowflake/snowpark_connect/includes/python/pyspark/install.py +203 -0
  123. snowflake/snowpark_connect/includes/python/pyspark/instrumentation_utils.py +190 -0
  124. snowflake/snowpark_connect/includes/python/pyspark/java_gateway.py +248 -0
  125. snowflake/snowpark_connect/includes/python/pyspark/join.py +118 -0
  126. snowflake/snowpark_connect/includes/python/pyspark/ml/__init__.py +71 -0
  127. snowflake/snowpark_connect/includes/python/pyspark/ml/_typing.pyi +84 -0
  128. snowflake/snowpark_connect/includes/python/pyspark/ml/base.py +414 -0
  129. snowflake/snowpark_connect/includes/python/pyspark/ml/classification.py +4332 -0
  130. snowflake/snowpark_connect/includes/python/pyspark/ml/clustering.py +2188 -0
  131. snowflake/snowpark_connect/includes/python/pyspark/ml/common.py +146 -0
  132. snowflake/snowpark_connect/includes/python/pyspark/ml/connect/__init__.py +44 -0
  133. snowflake/snowpark_connect/includes/python/pyspark/ml/connect/base.py +346 -0
  134. snowflake/snowpark_connect/includes/python/pyspark/ml/connect/classification.py +382 -0
  135. snowflake/snowpark_connect/includes/python/pyspark/ml/connect/evaluation.py +291 -0
  136. snowflake/snowpark_connect/includes/python/pyspark/ml/connect/feature.py +258 -0
  137. snowflake/snowpark_connect/includes/python/pyspark/ml/connect/functions.py +77 -0
  138. snowflake/snowpark_connect/includes/python/pyspark/ml/connect/io_utils.py +335 -0
  139. snowflake/snowpark_connect/includes/python/pyspark/ml/connect/pipeline.py +262 -0
  140. snowflake/snowpark_connect/includes/python/pyspark/ml/connect/summarizer.py +120 -0
  141. snowflake/snowpark_connect/includes/python/pyspark/ml/connect/tuning.py +579 -0
  142. snowflake/snowpark_connect/includes/python/pyspark/ml/connect/util.py +173 -0
  143. snowflake/snowpark_connect/includes/python/pyspark/ml/deepspeed/__init__.py +16 -0
  144. snowflake/snowpark_connect/includes/python/pyspark/ml/deepspeed/deepspeed_distributor.py +165 -0
  145. snowflake/snowpark_connect/includes/python/pyspark/ml/deepspeed/tests/test_deepspeed_distributor.py +306 -0
  146. snowflake/snowpark_connect/includes/python/pyspark/ml/dl_util.py +150 -0
  147. snowflake/snowpark_connect/includes/python/pyspark/ml/evaluation.py +1166 -0
  148. snowflake/snowpark_connect/includes/python/pyspark/ml/feature.py +7474 -0
  149. snowflake/snowpark_connect/includes/python/pyspark/ml/fpm.py +543 -0
  150. snowflake/snowpark_connect/includes/python/pyspark/ml/functions.py +842 -0
  151. snowflake/snowpark_connect/includes/python/pyspark/ml/image.py +271 -0
  152. snowflake/snowpark_connect/includes/python/pyspark/ml/linalg/__init__.py +1382 -0
  153. snowflake/snowpark_connect/includes/python/pyspark/ml/model_cache.py +55 -0
  154. snowflake/snowpark_connect/includes/python/pyspark/ml/param/__init__.py +602 -0
  155. snowflake/snowpark_connect/includes/python/pyspark/ml/param/_shared_params_code_gen.py +368 -0
  156. snowflake/snowpark_connect/includes/python/pyspark/ml/param/shared.py +878 -0
  157. snowflake/snowpark_connect/includes/python/pyspark/ml/pipeline.py +451 -0
  158. snowflake/snowpark_connect/includes/python/pyspark/ml/recommendation.py +748 -0
  159. snowflake/snowpark_connect/includes/python/pyspark/ml/regression.py +3335 -0
  160. snowflake/snowpark_connect/includes/python/pyspark/ml/stat.py +523 -0
  161. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/__init__.py +16 -0
  162. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_connect_classification.py +53 -0
  163. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_connect_evaluation.py +50 -0
  164. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_connect_feature.py +43 -0
  165. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_connect_function.py +114 -0
  166. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_connect_pipeline.py +47 -0
  167. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_connect_summarizer.py +43 -0
  168. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_connect_tuning.py +46 -0
  169. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_legacy_mode_classification.py +238 -0
  170. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_legacy_mode_evaluation.py +194 -0
  171. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_legacy_mode_feature.py +156 -0
  172. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_legacy_mode_pipeline.py +184 -0
  173. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_legacy_mode_summarizer.py +78 -0
  174. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_legacy_mode_tuning.py +292 -0
  175. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_parity_torch_data_loader.py +50 -0
  176. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_parity_torch_distributor.py +152 -0
  177. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_algorithms.py +456 -0
  178. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_base.py +96 -0
  179. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_dl_util.py +186 -0
  180. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_evaluation.py +77 -0
  181. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_feature.py +401 -0
  182. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_functions.py +528 -0
  183. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_image.py +82 -0
  184. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_linalg.py +409 -0
  185. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_model_cache.py +55 -0
  186. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_param.py +441 -0
  187. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_persistence.py +546 -0
  188. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_pipeline.py +71 -0
  189. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_stat.py +52 -0
  190. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_training_summary.py +494 -0
  191. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_util.py +85 -0
  192. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_wrapper.py +138 -0
  193. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/tuning/__init__.py +16 -0
  194. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/tuning/test_cv_io_basic.py +151 -0
  195. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/tuning/test_cv_io_nested.py +97 -0
  196. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/tuning/test_cv_io_pipeline.py +143 -0
  197. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/tuning/test_tuning.py +551 -0
  198. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/tuning/test_tvs_io_basic.py +137 -0
  199. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/tuning/test_tvs_io_nested.py +96 -0
  200. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/tuning/test_tvs_io_pipeline.py +142 -0
  201. snowflake/snowpark_connect/includes/python/pyspark/ml/torch/__init__.py +16 -0
  202. snowflake/snowpark_connect/includes/python/pyspark/ml/torch/data.py +100 -0
  203. snowflake/snowpark_connect/includes/python/pyspark/ml/torch/distributor.py +1133 -0
  204. snowflake/snowpark_connect/includes/python/pyspark/ml/torch/log_communication.py +198 -0
  205. snowflake/snowpark_connect/includes/python/pyspark/ml/torch/tests/__init__.py +16 -0
  206. snowflake/snowpark_connect/includes/python/pyspark/ml/torch/tests/test_data_loader.py +137 -0
  207. snowflake/snowpark_connect/includes/python/pyspark/ml/torch/tests/test_distributor.py +561 -0
  208. snowflake/snowpark_connect/includes/python/pyspark/ml/torch/tests/test_log_communication.py +172 -0
  209. snowflake/snowpark_connect/includes/python/pyspark/ml/torch/torch_run_process_wrapper.py +83 -0
  210. snowflake/snowpark_connect/includes/python/pyspark/ml/tree.py +434 -0
  211. snowflake/snowpark_connect/includes/python/pyspark/ml/tuning.py +1741 -0
  212. snowflake/snowpark_connect/includes/python/pyspark/ml/util.py +749 -0
  213. snowflake/snowpark_connect/includes/python/pyspark/ml/wrapper.py +465 -0
  214. snowflake/snowpark_connect/includes/python/pyspark/mllib/__init__.py +44 -0
  215. snowflake/snowpark_connect/includes/python/pyspark/mllib/_typing.pyi +33 -0
  216. snowflake/snowpark_connect/includes/python/pyspark/mllib/classification.py +989 -0
  217. snowflake/snowpark_connect/includes/python/pyspark/mllib/clustering.py +1318 -0
  218. snowflake/snowpark_connect/includes/python/pyspark/mllib/common.py +174 -0
  219. snowflake/snowpark_connect/includes/python/pyspark/mllib/evaluation.py +691 -0
  220. snowflake/snowpark_connect/includes/python/pyspark/mllib/feature.py +1085 -0
  221. snowflake/snowpark_connect/includes/python/pyspark/mllib/fpm.py +233 -0
  222. snowflake/snowpark_connect/includes/python/pyspark/mllib/linalg/__init__.py +1653 -0
  223. snowflake/snowpark_connect/includes/python/pyspark/mllib/linalg/distributed.py +1662 -0
  224. snowflake/snowpark_connect/includes/python/pyspark/mllib/random.py +698 -0
  225. snowflake/snowpark_connect/includes/python/pyspark/mllib/recommendation.py +389 -0
  226. snowflake/snowpark_connect/includes/python/pyspark/mllib/regression.py +1067 -0
  227. snowflake/snowpark_connect/includes/python/pyspark/mllib/stat/KernelDensity.py +59 -0
  228. snowflake/snowpark_connect/includes/python/pyspark/mllib/stat/__init__.py +34 -0
  229. snowflake/snowpark_connect/includes/python/pyspark/mllib/stat/_statistics.py +409 -0
  230. snowflake/snowpark_connect/includes/python/pyspark/mllib/stat/distribution.py +39 -0
  231. snowflake/snowpark_connect/includes/python/pyspark/mllib/stat/test.py +86 -0
  232. snowflake/snowpark_connect/includes/python/pyspark/mllib/tests/__init__.py +16 -0
  233. snowflake/snowpark_connect/includes/python/pyspark/mllib/tests/test_algorithms.py +353 -0
  234. snowflake/snowpark_connect/includes/python/pyspark/mllib/tests/test_feature.py +192 -0
  235. snowflake/snowpark_connect/includes/python/pyspark/mllib/tests/test_linalg.py +680 -0
  236. snowflake/snowpark_connect/includes/python/pyspark/mllib/tests/test_stat.py +206 -0
  237. snowflake/snowpark_connect/includes/python/pyspark/mllib/tests/test_streaming_algorithms.py +471 -0
  238. snowflake/snowpark_connect/includes/python/pyspark/mllib/tests/test_util.py +108 -0
  239. snowflake/snowpark_connect/includes/python/pyspark/mllib/tree.py +888 -0
  240. snowflake/snowpark_connect/includes/python/pyspark/mllib/util.py +659 -0
  241. snowflake/snowpark_connect/includes/python/pyspark/pandas/__init__.py +165 -0
  242. snowflake/snowpark_connect/includes/python/pyspark/pandas/_typing.py +52 -0
  243. snowflake/snowpark_connect/includes/python/pyspark/pandas/accessors.py +989 -0
  244. snowflake/snowpark_connect/includes/python/pyspark/pandas/base.py +1804 -0
  245. snowflake/snowpark_connect/includes/python/pyspark/pandas/categorical.py +822 -0
  246. snowflake/snowpark_connect/includes/python/pyspark/pandas/config.py +539 -0
  247. snowflake/snowpark_connect/includes/python/pyspark/pandas/correlation.py +262 -0
  248. snowflake/snowpark_connect/includes/python/pyspark/pandas/data_type_ops/__init__.py +16 -0
  249. snowflake/snowpark_connect/includes/python/pyspark/pandas/data_type_ops/base.py +519 -0
  250. snowflake/snowpark_connect/includes/python/pyspark/pandas/data_type_ops/binary_ops.py +98 -0
  251. snowflake/snowpark_connect/includes/python/pyspark/pandas/data_type_ops/boolean_ops.py +426 -0
  252. snowflake/snowpark_connect/includes/python/pyspark/pandas/data_type_ops/categorical_ops.py +141 -0
  253. snowflake/snowpark_connect/includes/python/pyspark/pandas/data_type_ops/complex_ops.py +145 -0
  254. snowflake/snowpark_connect/includes/python/pyspark/pandas/data_type_ops/date_ops.py +127 -0
  255. snowflake/snowpark_connect/includes/python/pyspark/pandas/data_type_ops/datetime_ops.py +171 -0
  256. snowflake/snowpark_connect/includes/python/pyspark/pandas/data_type_ops/null_ops.py +83 -0
  257. snowflake/snowpark_connect/includes/python/pyspark/pandas/data_type_ops/num_ops.py +588 -0
  258. snowflake/snowpark_connect/includes/python/pyspark/pandas/data_type_ops/string_ops.py +154 -0
  259. snowflake/snowpark_connect/includes/python/pyspark/pandas/data_type_ops/timedelta_ops.py +101 -0
  260. snowflake/snowpark_connect/includes/python/pyspark/pandas/data_type_ops/udt_ops.py +29 -0
  261. snowflake/snowpark_connect/includes/python/pyspark/pandas/datetimes.py +891 -0
  262. snowflake/snowpark_connect/includes/python/pyspark/pandas/exceptions.py +150 -0
  263. snowflake/snowpark_connect/includes/python/pyspark/pandas/extensions.py +388 -0
  264. snowflake/snowpark_connect/includes/python/pyspark/pandas/frame.py +13738 -0
  265. snowflake/snowpark_connect/includes/python/pyspark/pandas/generic.py +3560 -0
  266. snowflake/snowpark_connect/includes/python/pyspark/pandas/groupby.py +4448 -0
  267. snowflake/snowpark_connect/includes/python/pyspark/pandas/indexes/__init__.py +21 -0
  268. snowflake/snowpark_connect/includes/python/pyspark/pandas/indexes/base.py +2783 -0
  269. snowflake/snowpark_connect/includes/python/pyspark/pandas/indexes/category.py +773 -0
  270. snowflake/snowpark_connect/includes/python/pyspark/pandas/indexes/datetimes.py +843 -0
  271. snowflake/snowpark_connect/includes/python/pyspark/pandas/indexes/multi.py +1323 -0
  272. snowflake/snowpark_connect/includes/python/pyspark/pandas/indexes/numeric.py +210 -0
  273. snowflake/snowpark_connect/includes/python/pyspark/pandas/indexes/timedelta.py +197 -0
  274. snowflake/snowpark_connect/includes/python/pyspark/pandas/indexing.py +1862 -0
  275. snowflake/snowpark_connect/includes/python/pyspark/pandas/internal.py +1680 -0
  276. snowflake/snowpark_connect/includes/python/pyspark/pandas/missing/__init__.py +48 -0
  277. snowflake/snowpark_connect/includes/python/pyspark/pandas/missing/common.py +76 -0
  278. snowflake/snowpark_connect/includes/python/pyspark/pandas/missing/frame.py +63 -0
  279. snowflake/snowpark_connect/includes/python/pyspark/pandas/missing/general_functions.py +43 -0
  280. snowflake/snowpark_connect/includes/python/pyspark/pandas/missing/groupby.py +93 -0
  281. snowflake/snowpark_connect/includes/python/pyspark/pandas/missing/indexes.py +184 -0
  282. snowflake/snowpark_connect/includes/python/pyspark/pandas/missing/resample.py +101 -0
  283. snowflake/snowpark_connect/includes/python/pyspark/pandas/missing/scalars.py +29 -0
  284. snowflake/snowpark_connect/includes/python/pyspark/pandas/missing/series.py +69 -0
  285. snowflake/snowpark_connect/includes/python/pyspark/pandas/missing/window.py +168 -0
  286. snowflake/snowpark_connect/includes/python/pyspark/pandas/mlflow.py +238 -0
  287. snowflake/snowpark_connect/includes/python/pyspark/pandas/namespace.py +3807 -0
  288. snowflake/snowpark_connect/includes/python/pyspark/pandas/numpy_compat.py +260 -0
  289. snowflake/snowpark_connect/includes/python/pyspark/pandas/plot/__init__.py +17 -0
  290. snowflake/snowpark_connect/includes/python/pyspark/pandas/plot/core.py +1213 -0
  291. snowflake/snowpark_connect/includes/python/pyspark/pandas/plot/matplotlib.py +928 -0
  292. snowflake/snowpark_connect/includes/python/pyspark/pandas/plot/plotly.py +261 -0
  293. snowflake/snowpark_connect/includes/python/pyspark/pandas/resample.py +816 -0
  294. snowflake/snowpark_connect/includes/python/pyspark/pandas/series.py +7440 -0
  295. snowflake/snowpark_connect/includes/python/pyspark/pandas/sql_formatter.py +308 -0
  296. snowflake/snowpark_connect/includes/python/pyspark/pandas/sql_processor.py +394 -0
  297. snowflake/snowpark_connect/includes/python/pyspark/pandas/strings.py +2371 -0
  298. snowflake/snowpark_connect/includes/python/pyspark/pandas/supported_api_gen.py +378 -0
  299. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/__init__.py +16 -0
  300. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/__init__.py +16 -0
  301. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_any_all.py +177 -0
  302. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_apply_func.py +575 -0
  303. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_binary_ops.py +235 -0
  304. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_combine.py +653 -0
  305. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_compute.py +463 -0
  306. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_corrwith.py +86 -0
  307. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_cov.py +151 -0
  308. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_cumulative.py +139 -0
  309. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_describe.py +458 -0
  310. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_eval.py +86 -0
  311. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_melt.py +202 -0
  312. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_missing_data.py +520 -0
  313. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_pivot.py +361 -0
  314. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/__init__.py +16 -0
  315. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/__init__.py +16 -0
  316. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_any_all.py +40 -0
  317. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_apply_func.py +42 -0
  318. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_binary_ops.py +40 -0
  319. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_combine.py +37 -0
  320. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_compute.py +60 -0
  321. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_corrwith.py +40 -0
  322. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_cov.py +40 -0
  323. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_cumulative.py +90 -0
  324. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_describe.py +40 -0
  325. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_eval.py +40 -0
  326. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_melt.py +40 -0
  327. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_missing_data.py +42 -0
  328. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_pivot.py +37 -0
  329. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/__init__.py +16 -0
  330. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_base.py +36 -0
  331. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_binary_ops.py +42 -0
  332. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_boolean_ops.py +47 -0
  333. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_categorical_ops.py +55 -0
  334. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_complex_ops.py +40 -0
  335. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_date_ops.py +47 -0
  336. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_datetime_ops.py +47 -0
  337. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_null_ops.py +42 -0
  338. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_num_arithmetic.py +43 -0
  339. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_num_ops.py +47 -0
  340. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_num_reverse.py +43 -0
  341. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_string_ops.py +47 -0
  342. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_timedelta_ops.py +47 -0
  343. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_udt_ops.py +40 -0
  344. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/testing_utils.py +226 -0
  345. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/__init__.py +16 -0
  346. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_align.py +39 -0
  347. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_basic_slow.py +55 -0
  348. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_cov_corrwith.py +39 -0
  349. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_dot_frame.py +39 -0
  350. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_dot_series.py +39 -0
  351. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_index.py +39 -0
  352. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_series.py +39 -0
  353. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_setitem_frame.py +43 -0
  354. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_setitem_series.py +43 -0
  355. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/__init__.py +16 -0
  356. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_attrs.py +40 -0
  357. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_constructor.py +39 -0
  358. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_conversion.py +42 -0
  359. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_reindexing.py +42 -0
  360. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_reshaping.py +37 -0
  361. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_spark.py +40 -0
  362. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_take.py +42 -0
  363. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_time_series.py +48 -0
  364. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_truncate.py +40 -0
  365. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/__init__.py +16 -0
  366. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_aggregate.py +40 -0
  367. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_apply_func.py +41 -0
  368. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_cumulative.py +67 -0
  369. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_describe.py +40 -0
  370. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_groupby.py +55 -0
  371. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_head_tail.py +40 -0
  372. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_index.py +38 -0
  373. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_missing_data.py +55 -0
  374. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_split_apply.py +39 -0
  375. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_stat.py +38 -0
  376. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/__init__.py +16 -0
  377. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_align.py +40 -0
  378. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_base.py +50 -0
  379. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_category.py +73 -0
  380. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_datetime.py +39 -0
  381. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_indexing.py +40 -0
  382. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_reindex.py +40 -0
  383. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_rename.py +40 -0
  384. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_reset_index.py +48 -0
  385. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_timedelta.py +39 -0
  386. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/io/__init__.py +16 -0
  387. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/io/test_parity_io.py +40 -0
  388. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/plot/__init__.py +16 -0
  389. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/plot/test_parity_frame_plot.py +45 -0
  390. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/plot/test_parity_frame_plot_matplotlib.py +45 -0
  391. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/plot/test_parity_frame_plot_plotly.py +49 -0
  392. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/plot/test_parity_series_plot.py +37 -0
  393. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/plot/test_parity_series_plot_matplotlib.py +53 -0
  394. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/plot/test_parity_series_plot_plotly.py +45 -0
  395. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/__init__.py +16 -0
  396. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_all_any.py +38 -0
  397. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_arg_ops.py +37 -0
  398. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_as_of.py +37 -0
  399. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_as_type.py +38 -0
  400. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_compute.py +37 -0
  401. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_conversion.py +40 -0
  402. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_cumulative.py +40 -0
  403. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_index.py +38 -0
  404. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_missing_data.py +40 -0
  405. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_series.py +37 -0
  406. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_sort.py +38 -0
  407. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_stat.py +38 -0
  408. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_categorical.py +66 -0
  409. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_config.py +37 -0
  410. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_csv.py +37 -0
  411. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_dataframe_conversion.py +42 -0
  412. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_dataframe_spark_io.py +39 -0
  413. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_default_index.py +49 -0
  414. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_ewm.py +37 -0
  415. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_expanding.py +39 -0
  416. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_extension.py +49 -0
  417. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_frame_spark.py +53 -0
  418. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_generic_functions.py +43 -0
  419. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_indexing.py +49 -0
  420. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_indexops_spark.py +39 -0
  421. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_internal.py +41 -0
  422. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_namespace.py +39 -0
  423. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_numpy_compat.py +60 -0
  424. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_ops_on_diff_frames.py +48 -0
  425. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_ops_on_diff_frames_groupby.py +39 -0
  426. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_ops_on_diff_frames_groupby_expanding.py +44 -0
  427. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_ops_on_diff_frames_groupby_rolling.py +84 -0
  428. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_repr.py +37 -0
  429. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_resample.py +45 -0
  430. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_reshape.py +39 -0
  431. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_rolling.py +39 -0
  432. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_scalars.py +37 -0
  433. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_series_conversion.py +39 -0
  434. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_series_datetime.py +39 -0
  435. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_series_string.py +39 -0
  436. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_spark_functions.py +39 -0
  437. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_sql.py +43 -0
  438. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_stats.py +37 -0
  439. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_typedef.py +36 -0
  440. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_utils.py +37 -0
  441. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_window.py +39 -0
  442. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/__init__.py +16 -0
  443. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_base.py +107 -0
  444. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_binary_ops.py +224 -0
  445. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_boolean_ops.py +825 -0
  446. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_categorical_ops.py +562 -0
  447. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_complex_ops.py +368 -0
  448. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_date_ops.py +257 -0
  449. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_datetime_ops.py +260 -0
  450. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_null_ops.py +178 -0
  451. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_num_arithmetic.py +184 -0
  452. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_num_ops.py +497 -0
  453. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_num_reverse.py +140 -0
  454. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_string_ops.py +354 -0
  455. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_timedelta_ops.py +219 -0
  456. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_udt_ops.py +192 -0
  457. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/testing_utils.py +228 -0
  458. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/__init__.py +16 -0
  459. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_align.py +118 -0
  460. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_basic_slow.py +198 -0
  461. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_cov_corrwith.py +181 -0
  462. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_dot_frame.py +103 -0
  463. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_dot_series.py +141 -0
  464. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_index.py +109 -0
  465. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_series.py +136 -0
  466. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_setitem_frame.py +125 -0
  467. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_setitem_series.py +217 -0
  468. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/__init__.py +16 -0
  469. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_attrs.py +384 -0
  470. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_constructor.py +598 -0
  471. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_conversion.py +73 -0
  472. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_reindexing.py +869 -0
  473. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_reshaping.py +487 -0
  474. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_spark.py +309 -0
  475. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_take.py +156 -0
  476. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_time_series.py +149 -0
  477. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_truncate.py +163 -0
  478. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/__init__.py +16 -0
  479. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_aggregate.py +311 -0
  480. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_apply_func.py +524 -0
  481. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_cumulative.py +419 -0
  482. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_describe.py +144 -0
  483. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_groupby.py +979 -0
  484. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_head_tail.py +234 -0
  485. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_index.py +206 -0
  486. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_missing_data.py +421 -0
  487. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_split_apply.py +187 -0
  488. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_stat.py +397 -0
  489. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/__init__.py +16 -0
  490. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_align.py +100 -0
  491. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_base.py +2743 -0
  492. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_category.py +484 -0
  493. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_datetime.py +276 -0
  494. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_indexing.py +432 -0
  495. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_reindex.py +310 -0
  496. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_rename.py +257 -0
  497. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_reset_index.py +160 -0
  498. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_timedelta.py +128 -0
  499. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/io/__init__.py +16 -0
  500. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/io/test_io.py +137 -0
  501. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/plot/__init__.py +16 -0
  502. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/plot/test_frame_plot.py +170 -0
  503. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/plot/test_frame_plot_matplotlib.py +547 -0
  504. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/plot/test_frame_plot_plotly.py +285 -0
  505. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/plot/test_series_plot.py +106 -0
  506. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/plot/test_series_plot_matplotlib.py +409 -0
  507. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/plot/test_series_plot_plotly.py +247 -0
  508. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/__init__.py +16 -0
  509. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_all_any.py +105 -0
  510. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_arg_ops.py +197 -0
  511. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_as_of.py +137 -0
  512. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_as_type.py +227 -0
  513. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_compute.py +634 -0
  514. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_conversion.py +88 -0
  515. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_cumulative.py +139 -0
  516. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_index.py +475 -0
  517. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_missing_data.py +265 -0
  518. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_series.py +818 -0
  519. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_sort.py +162 -0
  520. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_stat.py +780 -0
  521. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_categorical.py +741 -0
  522. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_config.py +160 -0
  523. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_csv.py +453 -0
  524. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_dataframe_conversion.py +281 -0
  525. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_dataframe_spark_io.py +487 -0
  526. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_default_index.py +109 -0
  527. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_ewm.py +434 -0
  528. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_expanding.py +253 -0
  529. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_extension.py +152 -0
  530. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_frame_spark.py +162 -0
  531. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_generic_functions.py +234 -0
  532. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_indexing.py +1339 -0
  533. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_indexops_spark.py +82 -0
  534. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_internal.py +124 -0
  535. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_namespace.py +638 -0
  536. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_numpy_compat.py +200 -0
  537. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_ops_on_diff_frames.py +1355 -0
  538. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_ops_on_diff_frames_groupby.py +655 -0
  539. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_ops_on_diff_frames_groupby_expanding.py +113 -0
  540. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_ops_on_diff_frames_groupby_rolling.py +118 -0
  541. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_repr.py +192 -0
  542. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_resample.py +346 -0
  543. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_reshape.py +495 -0
  544. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_rolling.py +263 -0
  545. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_scalars.py +59 -0
  546. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_series_conversion.py +85 -0
  547. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_series_datetime.py +364 -0
  548. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_series_string.py +362 -0
  549. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_spark_functions.py +46 -0
  550. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_sql.py +123 -0
  551. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_stats.py +581 -0
  552. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_typedef.py +447 -0
  553. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_utils.py +301 -0
  554. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_window.py +465 -0
  555. snowflake/snowpark_connect/includes/python/pyspark/pandas/typedef/__init__.py +18 -0
  556. snowflake/snowpark_connect/includes/python/pyspark/pandas/typedef/typehints.py +874 -0
  557. snowflake/snowpark_connect/includes/python/pyspark/pandas/usage_logging/__init__.py +143 -0
  558. snowflake/snowpark_connect/includes/python/pyspark/pandas/usage_logging/usage_logger.py +132 -0
  559. snowflake/snowpark_connect/includes/python/pyspark/pandas/utils.py +1063 -0
  560. snowflake/snowpark_connect/includes/python/pyspark/pandas/window.py +2702 -0
  561. snowflake/snowpark_connect/includes/python/pyspark/profiler.py +489 -0
  562. snowflake/snowpark_connect/includes/python/pyspark/py.typed +1 -0
  563. snowflake/snowpark_connect/includes/python/pyspark/python/pyspark/shell.py +123 -0
  564. snowflake/snowpark_connect/includes/python/pyspark/rdd.py +5518 -0
  565. snowflake/snowpark_connect/includes/python/pyspark/rddsampler.py +115 -0
  566. snowflake/snowpark_connect/includes/python/pyspark/resource/__init__.py +38 -0
  567. snowflake/snowpark_connect/includes/python/pyspark/resource/information.py +69 -0
  568. snowflake/snowpark_connect/includes/python/pyspark/resource/profile.py +317 -0
  569. snowflake/snowpark_connect/includes/python/pyspark/resource/requests.py +539 -0
  570. snowflake/snowpark_connect/includes/python/pyspark/resource/tests/__init__.py +16 -0
  571. snowflake/snowpark_connect/includes/python/pyspark/resource/tests/test_resources.py +83 -0
  572. snowflake/snowpark_connect/includes/python/pyspark/resultiterable.py +45 -0
  573. snowflake/snowpark_connect/includes/python/pyspark/serializers.py +681 -0
  574. snowflake/snowpark_connect/includes/python/pyspark/shell.py +123 -0
  575. snowflake/snowpark_connect/includes/python/pyspark/shuffle.py +854 -0
  576. snowflake/snowpark_connect/includes/python/pyspark/sql/__init__.py +75 -0
  577. snowflake/snowpark_connect/includes/python/pyspark/sql/_typing.pyi +80 -0
  578. snowflake/snowpark_connect/includes/python/pyspark/sql/avro/__init__.py +18 -0
  579. snowflake/snowpark_connect/includes/python/pyspark/sql/avro/functions.py +188 -0
  580. snowflake/snowpark_connect/includes/python/pyspark/sql/catalog.py +1270 -0
  581. snowflake/snowpark_connect/includes/python/pyspark/sql/column.py +1431 -0
  582. snowflake/snowpark_connect/includes/python/pyspark/sql/conf.py +99 -0
  583. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/__init__.py +18 -0
  584. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/_typing.py +90 -0
  585. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/avro/__init__.py +18 -0
  586. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/avro/functions.py +107 -0
  587. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/catalog.py +356 -0
  588. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/client/__init__.py +22 -0
  589. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/client/artifact.py +412 -0
  590. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/client/core.py +1689 -0
  591. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/client/reattach.py +340 -0
  592. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/column.py +514 -0
  593. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/conf.py +128 -0
  594. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/conversion.py +490 -0
  595. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/dataframe.py +2172 -0
  596. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/expressions.py +1056 -0
  597. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/functions.py +3937 -0
  598. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/group.py +418 -0
  599. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/plan.py +2289 -0
  600. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/proto/__init__.py +25 -0
  601. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/proto/base_pb2.py +203 -0
  602. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/proto/base_pb2.pyi +2718 -0
  603. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/proto/base_pb2_grpc.py +423 -0
  604. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/proto/catalog_pb2.py +109 -0
  605. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/proto/catalog_pb2.pyi +1130 -0
  606. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/proto/commands_pb2.py +141 -0
  607. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/proto/commands_pb2.pyi +1766 -0
  608. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/proto/common_pb2.py +47 -0
  609. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/proto/common_pb2.pyi +123 -0
  610. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/proto/example_plugins_pb2.py +53 -0
  611. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/proto/example_plugins_pb2.pyi +112 -0
  612. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/proto/expressions_pb2.py +107 -0
  613. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/proto/expressions_pb2.pyi +1507 -0
  614. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/proto/relations_pb2.py +195 -0
  615. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/proto/relations_pb2.pyi +3613 -0
  616. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/proto/types_pb2.py +95 -0
  617. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/proto/types_pb2.pyi +980 -0
  618. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/protobuf/__init__.py +18 -0
  619. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/protobuf/functions.py +166 -0
  620. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/readwriter.py +861 -0
  621. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/session.py +952 -0
  622. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/streaming/__init__.py +22 -0
  623. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/streaming/query.py +295 -0
  624. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/streaming/readwriter.py +618 -0
  625. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/streaming/worker/__init__.py +18 -0
  626. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/streaming/worker/foreach_batch_worker.py +87 -0
  627. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/streaming/worker/listener_worker.py +100 -0
  628. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/types.py +301 -0
  629. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/udf.py +296 -0
  630. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/udtf.py +200 -0
  631. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/utils.py +58 -0
  632. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/window.py +266 -0
  633. snowflake/snowpark_connect/includes/python/pyspark/sql/context.py +818 -0
  634. snowflake/snowpark_connect/includes/python/pyspark/sql/dataframe.py +5973 -0
  635. snowflake/snowpark_connect/includes/python/pyspark/sql/functions.py +15889 -0
  636. snowflake/snowpark_connect/includes/python/pyspark/sql/group.py +547 -0
  637. snowflake/snowpark_connect/includes/python/pyspark/sql/observation.py +152 -0
  638. snowflake/snowpark_connect/includes/python/pyspark/sql/pandas/__init__.py +21 -0
  639. snowflake/snowpark_connect/includes/python/pyspark/sql/pandas/_typing/__init__.pyi +344 -0
  640. snowflake/snowpark_connect/includes/python/pyspark/sql/pandas/_typing/protocols/__init__.pyi +17 -0
  641. snowflake/snowpark_connect/includes/python/pyspark/sql/pandas/_typing/protocols/frame.pyi +20 -0
  642. snowflake/snowpark_connect/includes/python/pyspark/sql/pandas/_typing/protocols/series.pyi +20 -0
  643. snowflake/snowpark_connect/includes/python/pyspark/sql/pandas/conversion.py +671 -0
  644. snowflake/snowpark_connect/includes/python/pyspark/sql/pandas/functions.py +480 -0
  645. snowflake/snowpark_connect/includes/python/pyspark/sql/pandas/functions.pyi +132 -0
  646. snowflake/snowpark_connect/includes/python/pyspark/sql/pandas/group_ops.py +523 -0
  647. snowflake/snowpark_connect/includes/python/pyspark/sql/pandas/map_ops.py +216 -0
  648. snowflake/snowpark_connect/includes/python/pyspark/sql/pandas/serializers.py +1019 -0
  649. snowflake/snowpark_connect/includes/python/pyspark/sql/pandas/typehints.py +172 -0
  650. snowflake/snowpark_connect/includes/python/pyspark/sql/pandas/types.py +972 -0
  651. snowflake/snowpark_connect/includes/python/pyspark/sql/pandas/utils.py +86 -0
  652. snowflake/snowpark_connect/includes/python/pyspark/sql/protobuf/__init__.py +18 -0
  653. snowflake/snowpark_connect/includes/python/pyspark/sql/protobuf/functions.py +334 -0
  654. snowflake/snowpark_connect/includes/python/pyspark/sql/readwriter.py +2159 -0
  655. snowflake/snowpark_connect/includes/python/pyspark/sql/session.py +2088 -0
  656. snowflake/snowpark_connect/includes/python/pyspark/sql/sql_formatter.py +84 -0
  657. snowflake/snowpark_connect/includes/python/pyspark/sql/streaming/__init__.py +21 -0
  658. snowflake/snowpark_connect/includes/python/pyspark/sql/streaming/listener.py +1050 -0
  659. snowflake/snowpark_connect/includes/python/pyspark/sql/streaming/query.py +746 -0
  660. snowflake/snowpark_connect/includes/python/pyspark/sql/streaming/readwriter.py +1652 -0
  661. snowflake/snowpark_connect/includes/python/pyspark/sql/streaming/state.py +288 -0
  662. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/__init__.py +16 -0
  663. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/__init__.py +16 -0
  664. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/client/__init__.py +16 -0
  665. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/client/test_artifact.py +420 -0
  666. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/client/test_client.py +358 -0
  667. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/streaming/__init__.py +16 -0
  668. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/streaming/test_parity_foreach.py +36 -0
  669. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/streaming/test_parity_foreach_batch.py +44 -0
  670. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/streaming/test_parity_listener.py +116 -0
  671. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/streaming/test_parity_streaming.py +35 -0
  672. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_connect_basic.py +3612 -0
  673. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_connect_column.py +1042 -0
  674. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_connect_function.py +2381 -0
  675. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_connect_plan.py +1060 -0
  676. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_arrow.py +163 -0
  677. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_arrow_map.py +38 -0
  678. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_arrow_python_udf.py +48 -0
  679. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_catalog.py +36 -0
  680. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_column.py +55 -0
  681. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_conf.py +36 -0
  682. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_dataframe.py +96 -0
  683. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_datasources.py +44 -0
  684. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_errors.py +36 -0
  685. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_functions.py +59 -0
  686. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_group.py +36 -0
  687. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_pandas_cogrouped_map.py +59 -0
  688. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_pandas_grouped_map.py +74 -0
  689. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_pandas_grouped_map_with_state.py +62 -0
  690. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_pandas_map.py +58 -0
  691. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_pandas_udf.py +70 -0
  692. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_pandas_udf_grouped_agg.py +50 -0
  693. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_pandas_udf_scalar.py +68 -0
  694. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_pandas_udf_window.py +40 -0
  695. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_readwriter.py +46 -0
  696. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_serde.py +44 -0
  697. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_types.py +100 -0
  698. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_udf.py +100 -0
  699. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_udtf.py +163 -0
  700. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_session.py +181 -0
  701. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_utils.py +42 -0
  702. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/__init__.py +16 -0
  703. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_cogrouped_map.py +623 -0
  704. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_grouped_map.py +869 -0
  705. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_grouped_map_with_state.py +342 -0
  706. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_map.py +436 -0
  707. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_udf.py +363 -0
  708. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_udf_grouped_agg.py +592 -0
  709. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_udf_scalar.py +1503 -0
  710. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_udf_typehints.py +392 -0
  711. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_udf_typehints_with_future_annotations.py +375 -0
  712. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_udf_window.py +411 -0
  713. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/streaming/__init__.py +16 -0
  714. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/streaming/test_streaming.py +401 -0
  715. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/streaming/test_streaming_foreach.py +295 -0
  716. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/streaming/test_streaming_foreach_batch.py +106 -0
  717. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/streaming/test_streaming_listener.py +558 -0
  718. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_arrow.py +1346 -0
  719. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_arrow_map.py +182 -0
  720. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_arrow_python_udf.py +202 -0
  721. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_catalog.py +503 -0
  722. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_column.py +225 -0
  723. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_conf.py +83 -0
  724. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_context.py +201 -0
  725. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_dataframe.py +1931 -0
  726. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_datasources.py +256 -0
  727. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_errors.py +69 -0
  728. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_functions.py +1349 -0
  729. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_group.py +53 -0
  730. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_pandas_sqlmetrics.py +68 -0
  731. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_readwriter.py +283 -0
  732. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_serde.py +155 -0
  733. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_session.py +412 -0
  734. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_types.py +1581 -0
  735. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_udf.py +961 -0
  736. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_udf_profiler.py +165 -0
  737. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_udtf.py +1456 -0
  738. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_utils.py +1686 -0
  739. snowflake/snowpark_connect/includes/python/pyspark/sql/types.py +2558 -0
  740. snowflake/snowpark_connect/includes/python/pyspark/sql/udf.py +714 -0
  741. snowflake/snowpark_connect/includes/python/pyspark/sql/udtf.py +325 -0
  742. snowflake/snowpark_connect/includes/python/pyspark/sql/utils.py +339 -0
  743. snowflake/snowpark_connect/includes/python/pyspark/sql/window.py +492 -0
  744. snowflake/snowpark_connect/includes/python/pyspark/statcounter.py +165 -0
  745. snowflake/snowpark_connect/includes/python/pyspark/status.py +112 -0
  746. snowflake/snowpark_connect/includes/python/pyspark/storagelevel.py +97 -0
  747. snowflake/snowpark_connect/includes/python/pyspark/streaming/__init__.py +22 -0
  748. snowflake/snowpark_connect/includes/python/pyspark/streaming/context.py +471 -0
  749. snowflake/snowpark_connect/includes/python/pyspark/streaming/dstream.py +933 -0
  750. snowflake/snowpark_connect/includes/python/pyspark/streaming/kinesis.py +205 -0
  751. snowflake/snowpark_connect/includes/python/pyspark/streaming/listener.py +83 -0
  752. snowflake/snowpark_connect/includes/python/pyspark/streaming/tests/__init__.py +16 -0
  753. snowflake/snowpark_connect/includes/python/pyspark/streaming/tests/test_context.py +184 -0
  754. snowflake/snowpark_connect/includes/python/pyspark/streaming/tests/test_dstream.py +706 -0
  755. snowflake/snowpark_connect/includes/python/pyspark/streaming/tests/test_kinesis.py +118 -0
  756. snowflake/snowpark_connect/includes/python/pyspark/streaming/tests/test_listener.py +160 -0
  757. snowflake/snowpark_connect/includes/python/pyspark/streaming/util.py +168 -0
  758. snowflake/snowpark_connect/includes/python/pyspark/taskcontext.py +502 -0
  759. snowflake/snowpark_connect/includes/python/pyspark/testing/__init__.py +21 -0
  760. snowflake/snowpark_connect/includes/python/pyspark/testing/connectutils.py +199 -0
  761. snowflake/snowpark_connect/includes/python/pyspark/testing/mllibutils.py +30 -0
  762. snowflake/snowpark_connect/includes/python/pyspark/testing/mlutils.py +275 -0
  763. snowflake/snowpark_connect/includes/python/pyspark/testing/objects.py +121 -0
  764. snowflake/snowpark_connect/includes/python/pyspark/testing/pandasutils.py +714 -0
  765. snowflake/snowpark_connect/includes/python/pyspark/testing/sqlutils.py +168 -0
  766. snowflake/snowpark_connect/includes/python/pyspark/testing/streamingutils.py +178 -0
  767. snowflake/snowpark_connect/includes/python/pyspark/testing/utils.py +636 -0
  768. snowflake/snowpark_connect/includes/python/pyspark/tests/__init__.py +16 -0
  769. snowflake/snowpark_connect/includes/python/pyspark/tests/test_appsubmit.py +306 -0
  770. snowflake/snowpark_connect/includes/python/pyspark/tests/test_broadcast.py +196 -0
  771. snowflake/snowpark_connect/includes/python/pyspark/tests/test_conf.py +44 -0
  772. snowflake/snowpark_connect/includes/python/pyspark/tests/test_context.py +346 -0
  773. snowflake/snowpark_connect/includes/python/pyspark/tests/test_daemon.py +89 -0
  774. snowflake/snowpark_connect/includes/python/pyspark/tests/test_install_spark.py +124 -0
  775. snowflake/snowpark_connect/includes/python/pyspark/tests/test_join.py +69 -0
  776. snowflake/snowpark_connect/includes/python/pyspark/tests/test_memory_profiler.py +167 -0
  777. snowflake/snowpark_connect/includes/python/pyspark/tests/test_pin_thread.py +194 -0
  778. snowflake/snowpark_connect/includes/python/pyspark/tests/test_profiler.py +168 -0
  779. snowflake/snowpark_connect/includes/python/pyspark/tests/test_rdd.py +939 -0
  780. snowflake/snowpark_connect/includes/python/pyspark/tests/test_rddbarrier.py +52 -0
  781. snowflake/snowpark_connect/includes/python/pyspark/tests/test_rddsampler.py +66 -0
  782. snowflake/snowpark_connect/includes/python/pyspark/tests/test_readwrite.py +368 -0
  783. snowflake/snowpark_connect/includes/python/pyspark/tests/test_serializers.py +257 -0
  784. snowflake/snowpark_connect/includes/python/pyspark/tests/test_shuffle.py +267 -0
  785. snowflake/snowpark_connect/includes/python/pyspark/tests/test_stage_sched.py +153 -0
  786. snowflake/snowpark_connect/includes/python/pyspark/tests/test_statcounter.py +130 -0
  787. snowflake/snowpark_connect/includes/python/pyspark/tests/test_taskcontext.py +350 -0
  788. snowflake/snowpark_connect/includes/python/pyspark/tests/test_util.py +97 -0
  789. snowflake/snowpark_connect/includes/python/pyspark/tests/test_worker.py +271 -0
  790. snowflake/snowpark_connect/includes/python/pyspark/traceback_utils.py +81 -0
  791. snowflake/snowpark_connect/includes/python/pyspark/util.py +416 -0
  792. snowflake/snowpark_connect/includes/python/pyspark/version.py +19 -0
  793. snowflake/snowpark_connect/includes/python/pyspark/worker.py +1307 -0
  794. snowflake/snowpark_connect/includes/python/pyspark/worker_util.py +46 -0
  795. snowflake/snowpark_connect/proto/__init__.py +10 -0
  796. snowflake/snowpark_connect/proto/control_pb2.py +35 -0
  797. snowflake/snowpark_connect/proto/control_pb2.pyi +38 -0
  798. snowflake/snowpark_connect/proto/control_pb2_grpc.py +183 -0
  799. snowflake/snowpark_connect/proto/snowflake_expression_ext_pb2.py +35 -0
  800. snowflake/snowpark_connect/proto/snowflake_expression_ext_pb2.pyi +53 -0
  801. snowflake/snowpark_connect/proto/snowflake_rdd_pb2.pyi +39 -0
  802. snowflake/snowpark_connect/proto/snowflake_relation_ext_pb2.py +47 -0
  803. snowflake/snowpark_connect/proto/snowflake_relation_ext_pb2.pyi +111 -0
  804. snowflake/snowpark_connect/relation/__init__.py +3 -0
  805. snowflake/snowpark_connect/relation/catalogs/__init__.py +12 -0
  806. snowflake/snowpark_connect/relation/catalogs/abstract_spark_catalog.py +287 -0
  807. snowflake/snowpark_connect/relation/catalogs/snowflake_catalog.py +467 -0
  808. snowflake/snowpark_connect/relation/catalogs/utils.py +51 -0
  809. snowflake/snowpark_connect/relation/io_utils.py +76 -0
  810. snowflake/snowpark_connect/relation/map_aggregate.py +322 -0
  811. snowflake/snowpark_connect/relation/map_catalog.py +151 -0
  812. snowflake/snowpark_connect/relation/map_column_ops.py +1068 -0
  813. snowflake/snowpark_connect/relation/map_crosstab.py +48 -0
  814. snowflake/snowpark_connect/relation/map_extension.py +412 -0
  815. snowflake/snowpark_connect/relation/map_join.py +341 -0
  816. snowflake/snowpark_connect/relation/map_local_relation.py +326 -0
  817. snowflake/snowpark_connect/relation/map_map_partitions.py +146 -0
  818. snowflake/snowpark_connect/relation/map_relation.py +253 -0
  819. snowflake/snowpark_connect/relation/map_row_ops.py +716 -0
  820. snowflake/snowpark_connect/relation/map_sample_by.py +35 -0
  821. snowflake/snowpark_connect/relation/map_show_string.py +50 -0
  822. snowflake/snowpark_connect/relation/map_sql.py +1874 -0
  823. snowflake/snowpark_connect/relation/map_stats.py +324 -0
  824. snowflake/snowpark_connect/relation/map_subquery_alias.py +32 -0
  825. snowflake/snowpark_connect/relation/map_udtf.py +288 -0
  826. snowflake/snowpark_connect/relation/read/__init__.py +7 -0
  827. snowflake/snowpark_connect/relation/read/jdbc_read_dbapi.py +668 -0
  828. snowflake/snowpark_connect/relation/read/map_read.py +367 -0
  829. snowflake/snowpark_connect/relation/read/map_read_csv.py +142 -0
  830. snowflake/snowpark_connect/relation/read/map_read_jdbc.py +108 -0
  831. snowflake/snowpark_connect/relation/read/map_read_json.py +344 -0
  832. snowflake/snowpark_connect/relation/read/map_read_parquet.py +194 -0
  833. snowflake/snowpark_connect/relation/read/map_read_socket.py +59 -0
  834. snowflake/snowpark_connect/relation/read/map_read_table.py +109 -0
  835. snowflake/snowpark_connect/relation/read/map_read_text.py +106 -0
  836. snowflake/snowpark_connect/relation/read/reader_config.py +399 -0
  837. snowflake/snowpark_connect/relation/read/utils.py +155 -0
  838. snowflake/snowpark_connect/relation/stage_locator.py +161 -0
  839. snowflake/snowpark_connect/relation/utils.py +219 -0
  840. snowflake/snowpark_connect/relation/write/__init__.py +3 -0
  841. snowflake/snowpark_connect/relation/write/jdbc_write_dbapi.py +339 -0
  842. snowflake/snowpark_connect/relation/write/map_write.py +436 -0
  843. snowflake/snowpark_connect/relation/write/map_write_jdbc.py +48 -0
  844. snowflake/snowpark_connect/resources/java_udfs-1.0-SNAPSHOT.jar +0 -0
  845. snowflake/snowpark_connect/resources_initializer.py +75 -0
  846. snowflake/snowpark_connect/server.py +1136 -0
  847. snowflake/snowpark_connect/start_server.py +32 -0
  848. snowflake/snowpark_connect/tcm.py +8 -0
  849. snowflake/snowpark_connect/type_mapping.py +1003 -0
  850. snowflake/snowpark_connect/typed_column.py +94 -0
  851. snowflake/snowpark_connect/utils/__init__.py +3 -0
  852. snowflake/snowpark_connect/utils/artifacts.py +48 -0
  853. snowflake/snowpark_connect/utils/attribute_handling.py +72 -0
  854. snowflake/snowpark_connect/utils/cache.py +84 -0
  855. snowflake/snowpark_connect/utils/concurrent.py +124 -0
  856. snowflake/snowpark_connect/utils/context.py +390 -0
  857. snowflake/snowpark_connect/utils/describe_query_cache.py +231 -0
  858. snowflake/snowpark_connect/utils/interrupt.py +85 -0
  859. snowflake/snowpark_connect/utils/io_utils.py +35 -0
  860. snowflake/snowpark_connect/utils/pandas_udtf_utils.py +117 -0
  861. snowflake/snowpark_connect/utils/profiling.py +47 -0
  862. snowflake/snowpark_connect/utils/session.py +180 -0
  863. snowflake/snowpark_connect/utils/snowpark_connect_logging.py +38 -0
  864. snowflake/snowpark_connect/utils/telemetry.py +513 -0
  865. snowflake/snowpark_connect/utils/udf_cache.py +392 -0
  866. snowflake/snowpark_connect/utils/udf_helper.py +328 -0
  867. snowflake/snowpark_connect/utils/udf_utils.py +310 -0
  868. snowflake/snowpark_connect/utils/udtf_helper.py +420 -0
  869. snowflake/snowpark_connect/utils/udtf_utils.py +799 -0
  870. snowflake/snowpark_connect/utils/xxhash64.py +247 -0
  871. snowflake/snowpark_connect/version.py +6 -0
  872. snowpark_connect-0.20.2.data/scripts/snowpark-connect +71 -0
  873. snowpark_connect-0.20.2.data/scripts/snowpark-session +11 -0
  874. snowpark_connect-0.20.2.data/scripts/snowpark-submit +354 -0
  875. snowpark_connect-0.20.2.dist-info/METADATA +37 -0
  876. snowpark_connect-0.20.2.dist-info/RECORD +879 -0
  877. snowpark_connect-0.20.2.dist-info/WHEEL +5 -0
  878. snowpark_connect-0.20.2.dist-info/licenses/LICENSE.txt +202 -0
  879. snowpark_connect-0.20.2.dist-info/top_level.txt +1 -0
@@ -0,0 +1,2088 @@
1
+ #
2
+ # Licensed to the Apache Software Foundation (ASF) under one or more
3
+ # contributor license agreements. See the NOTICE file distributed with
4
+ # this work for additional information regarding copyright ownership.
5
+ # The ASF licenses this file to You under the Apache License, Version 2.0
6
+ # (the "License"); you may not use this file except in compliance with
7
+ # the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing, software
12
+ # distributed under the License is distributed on an "AS IS" BASIS,
13
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ # See the License for the specific language governing permissions and
15
+ # limitations under the License.
16
+ #
17
+ import os
18
+ import sys
19
+ import warnings
20
+ from collections.abc import Sized
21
+ from functools import reduce
22
+ from threading import RLock
23
+ from types import TracebackType
24
+ from typing import (
25
+ Any,
26
+ ClassVar,
27
+ Dict,
28
+ Iterable,
29
+ List,
30
+ Optional,
31
+ Tuple,
32
+ Type,
33
+ Union,
34
+ Set,
35
+ cast,
36
+ no_type_check,
37
+ overload,
38
+ TYPE_CHECKING,
39
+ )
40
+
41
+ from py4j.java_gateway import JavaObject
42
+
43
+ from pyspark import SparkConf, SparkContext
44
+ from pyspark.rdd import RDD
45
+ from pyspark.sql.column import _to_java_column
46
+ from pyspark.sql.conf import RuntimeConfig
47
+ from pyspark.sql.dataframe import DataFrame
48
+ from pyspark.sql.functions import lit
49
+ from pyspark.sql.pandas.conversion import SparkConversionMixin
50
+ from pyspark.sql.readwriter import DataFrameReader
51
+ from pyspark.sql.sql_formatter import SQLStringFormatter
52
+ from pyspark.sql.streaming import DataStreamReader
53
+ from pyspark.sql.types import (
54
+ AtomicType,
55
+ DataType,
56
+ StructField,
57
+ StructType,
58
+ _make_type_verifier,
59
+ _infer_schema,
60
+ _has_nulltype,
61
+ _merge_type,
62
+ _create_converter,
63
+ _parse_datatype_string,
64
+ _from_numpy_type,
65
+ )
66
+ from pyspark.errors.exceptions.captured import install_exception_handler
67
+ from pyspark.sql.utils import is_timestamp_ntz_preferred, to_str, try_remote_session_classmethod
68
+ from pyspark.errors import PySparkValueError, PySparkTypeError, PySparkRuntimeError
69
+
70
+ if TYPE_CHECKING:
71
+ from pyspark.sql._typing import AtomicValue, RowLike, OptionalPrimitiveType
72
+ from pyspark.sql.catalog import Catalog
73
+ from pyspark.sql.pandas._typing import ArrayLike, DataFrameLike as PandasDataFrameLike
74
+ from pyspark.sql.streaming import StreamingQueryManager
75
+ from pyspark.sql.udf import UDFRegistration
76
+ from pyspark.sql.udtf import UDTFRegistration
77
+
78
+ # Running MyPy type checks will always require pandas and
79
+ # other dependencies so importing here is fine.
80
+ from pyspark.sql.connect.client import SparkConnectClient
81
+
82
+
83
+ __all__ = ["SparkSession"]
84
+
85
+
86
+ def _monkey_patch_RDD(sparkSession: "SparkSession") -> None:
87
+ @no_type_check
88
+ def toDF(self, schema=None, sampleRatio=None):
89
+ """
90
+ Converts current :class:`RDD` into a :class:`DataFrame`
91
+
92
+ This is a shorthand for ``spark.createDataFrame(rdd, schema, sampleRatio)``
93
+
94
+ Parameters
95
+ ----------
96
+ schema : :class:`pyspark.sql.types.DataType`, str or list, optional
97
+ a :class:`pyspark.sql.types.DataType` or a datatype string or a list of
98
+ column names, default is None. The data type string format equals to
99
+ :class:`pyspark.sql.types.DataType.simpleString`, except that top level struct type can
100
+ omit the ``struct<>`` and atomic types use ``typeName()`` as their format, e.g. use
101
+ ``byte`` instead of ``tinyint`` for :class:`pyspark.sql.types.ByteType`.
102
+ We can also use ``int`` as a short name for :class:`pyspark.sql.types.IntegerType`.
103
+ sampleRatio : float, optional
104
+ the sample ratio of rows used for inferring
105
+
106
+ Returns
107
+ -------
108
+ :class:`DataFrame`
109
+
110
+ Examples
111
+ --------
112
+ >>> rdd = spark.range(1).rdd.map(lambda x: tuple(x))
113
+ >>> rdd.collect()
114
+ [(0,)]
115
+ >>> rdd.toDF().show()
116
+ +---+
117
+ | _1|
118
+ +---+
119
+ | 0|
120
+ +---+
121
+ """
122
+ return sparkSession.createDataFrame(self, schema, sampleRatio)
123
+
124
+ RDD.toDF = toDF # type: ignore[assignment]
125
+
126
+
127
+ # TODO(SPARK-38912): This method can be dropped once support for Python 3.8 is dropped
128
+ # In Python 3.9, the @property decorator has been made compatible with the
129
+ # @classmethod decorator (https://docs.python.org/3.9/library/functions.html#classmethod)
130
+ #
131
+ # @classmethod + @property is also affected by a bug in Python's docstring which was backported
132
+ # to Python 3.9.6 (https://github.com/python/cpython/pull/28838)
133
+ class classproperty(property):
134
+ """Same as Python's @property decorator, but for class attributes.
135
+
136
+ Examples
137
+ --------
138
+ >>> class Builder:
139
+ ... def build(self):
140
+ ... return MyClass()
141
+ ...
142
+ >>> class MyClass:
143
+ ... @classproperty
144
+ ... def builder(cls):
145
+ ... print("instantiating new builder")
146
+ ... return Builder()
147
+ ...
148
+ >>> c1 = MyClass.builder
149
+ instantiating new builder
150
+ >>> c2 = MyClass.builder
151
+ instantiating new builder
152
+ >>> c1 == c2
153
+ False
154
+ >>> isinstance(c1.build(), MyClass)
155
+ True
156
+ """
157
+
158
+ def __get__(self, instance: Any, owner: Any = None) -> "SparkSession.Builder":
159
+ # The "type: ignore" below silences the following error from mypy:
160
+ # error: Argument 1 to "classmethod" has incompatible
161
+ # type "Optional[Callable[[Any], Any]]";
162
+ # expected "Callable[..., Any]" [arg-type]
163
+ return classmethod(self.fget).__get__(None, owner)() # type: ignore
164
+
165
+
166
+ class SparkSession(SparkConversionMixin):
167
+ """The entry point to programming Spark with the Dataset and DataFrame API.
168
+
169
+ A SparkSession can be used to create :class:`DataFrame`, register :class:`DataFrame` as
170
+ tables, execute SQL over tables, cache tables, and read parquet files.
171
+ To create a :class:`SparkSession`, use the following builder pattern:
172
+
173
+ .. versionchanged:: 3.4.0
174
+ Supports Spark Connect.
175
+
176
+ .. autoattribute:: builder
177
+ :annotation:
178
+
179
+ Examples
180
+ --------
181
+ Create a Spark session.
182
+
183
+ >>> spark = (
184
+ ... SparkSession.builder
185
+ ... .master("local")
186
+ ... .appName("Word Count")
187
+ ... .config("spark.some.config.option", "some-value")
188
+ ... .getOrCreate()
189
+ ... )
190
+
191
+ Create a Spark session with Spark Connect.
192
+
193
+ >>> spark = (
194
+ ... SparkSession.builder
195
+ ... .remote("sc://localhost")
196
+ ... .appName("Word Count")
197
+ ... .config("spark.some.config.option", "some-value")
198
+ ... .getOrCreate()
199
+ ... ) # doctest: +SKIP
200
+ """
201
+
202
+ class Builder:
203
+ """Builder for :class:`SparkSession`."""
204
+
205
+ _lock = RLock()
206
+
207
+ def __init__(self) -> None:
208
+ self._options: Dict[str, Any] = {}
209
+
210
+ @overload
211
+ def config(self, *, conf: SparkConf) -> "SparkSession.Builder":
212
+ ...
213
+
214
+ @overload
215
+ def config(self, key: str, value: Any) -> "SparkSession.Builder":
216
+ ...
217
+
218
+ @overload
219
+ def config(self, *, map: Dict[str, "OptionalPrimitiveType"]) -> "SparkSession.Builder":
220
+ ...
221
+
222
+ def config(
223
+ self,
224
+ key: Optional[str] = None,
225
+ value: Optional[Any] = None,
226
+ conf: Optional[SparkConf] = None,
227
+ *,
228
+ map: Optional[Dict[str, "OptionalPrimitiveType"]] = None,
229
+ ) -> "SparkSession.Builder":
230
+ """Sets a config option. Options set using this method are automatically propagated to
231
+ both :class:`SparkConf` and :class:`SparkSession`'s own configuration.
232
+
233
+ .. versionadded:: 2.0.0
234
+
235
+ .. versionchanged:: 3.4.0
236
+ Supports Spark Connect.
237
+
238
+ Parameters
239
+ ----------
240
+ key : str, optional
241
+ a key name string for configuration property
242
+ value : str, optional
243
+ a value for configuration property
244
+ conf : :class:`SparkConf`, optional
245
+ an instance of :class:`SparkConf`
246
+ map: dictionary, optional
247
+ a dictionary of configurations to set
248
+
249
+ .. versionadded:: 3.4.0
250
+
251
+ Returns
252
+ -------
253
+ :class:`SparkSession.Builder`
254
+
255
+ Examples
256
+ --------
257
+ For an existing class:`SparkConf`, use `conf` parameter.
258
+
259
+ >>> from pyspark.conf import SparkConf
260
+ >>> SparkSession.builder.config(conf=SparkConf())
261
+ <pyspark.sql.session.SparkSession.Builder...
262
+
263
+ For a (key, value) pair, you can omit parameter names.
264
+
265
+ >>> SparkSession.builder.config("spark.some.config.option", "some-value")
266
+ <pyspark.sql.session.SparkSession.Builder...
267
+
268
+ Additionally, you can pass a dictionary of configurations to set.
269
+
270
+ >>> SparkSession.builder.config(
271
+ ... map={"spark.some.config.number": 123, "spark.some.config.float": 0.123})
272
+ <pyspark.sql.session.SparkSession.Builder...
273
+ """
274
+ with self._lock:
275
+ if conf is not None:
276
+ for (k, v) in conf.getAll():
277
+ self._validate_startup_urls()
278
+ self._options[k] = v
279
+ elif map is not None:
280
+ for k, v in map.items(): # type: ignore[assignment]
281
+ v = to_str(v) # type: ignore[assignment]
282
+ self._validate_startup_urls()
283
+ self._options[k] = v
284
+ else:
285
+ value = to_str(value)
286
+ self._validate_startup_urls()
287
+ self._options[cast(str, key)] = value
288
+ return self
289
+
290
+ def _validate_startup_urls(
291
+ self,
292
+ ) -> None:
293
+ """
294
+ Helper function that validates the combination of startup URLs and raises an exception
295
+ if incompatible options are selected.
296
+ """
297
+ if "spark.master" in self._options and (
298
+ "spark.remote" in self._options or "SPARK_REMOTE" in os.environ
299
+ ):
300
+ raise RuntimeError(
301
+ "Spark master cannot be configured with Spark Connect server; "
302
+ "however, found URL for Spark Connect [%s]"
303
+ % self._options.get("spark.remote", os.environ.get("SPARK_REMOTE"))
304
+ )
305
+ if "spark.remote" in self._options and (
306
+ "spark.master" in self._options or "MASTER" in os.environ
307
+ ):
308
+ raise RuntimeError(
309
+ "Spark Connect server cannot be configured with Spark master; "
310
+ "however, found URL for Spark master [%s]"
311
+ % self._options.get("spark.master", os.environ.get("MASTER"))
312
+ )
313
+
314
+ if "spark.remote" in self._options:
315
+ remote = cast(str, self._options.get("spark.remote"))
316
+ if ("SPARK_REMOTE" in os.environ and os.environ["SPARK_REMOTE"] != remote) and (
317
+ "SPARK_LOCAL_REMOTE" in os.environ and not remote.startswith("local")
318
+ ):
319
+ raise RuntimeError(
320
+ "Only one Spark Connect client URL can be set; however, got a "
321
+ "different URL [%s] from the existing [%s]"
322
+ % (os.environ["SPARK_REMOTE"], remote)
323
+ )
324
+
325
+ def master(self, master: str) -> "SparkSession.Builder":
326
+ """Sets the Spark master URL to connect to, such as "local" to run locally, "local[4]"
327
+ to run locally with 4 cores, or "spark://master:7077" to run on a Spark standalone
328
+ cluster.
329
+
330
+ .. versionadded:: 2.0.0
331
+
332
+ Parameters
333
+ ----------
334
+ master : str
335
+ a url for spark master
336
+
337
+ Returns
338
+ -------
339
+ :class:`SparkSession.Builder`
340
+
341
+ Examples
342
+ --------
343
+ >>> SparkSession.builder.master("local")
344
+ <pyspark.sql.session.SparkSession.Builder...
345
+ """
346
+ return self.config("spark.master", master)
347
+
348
+ def remote(self, url: str) -> "SparkSession.Builder":
349
+ """Sets the Spark remote URL to connect to, such as "sc://host:port" to run
350
+ it via Spark Connect server.
351
+
352
+ .. versionadded:: 3.4.0
353
+
354
+ Parameters
355
+ ----------
356
+ url : str
357
+ URL to Spark Connect server
358
+
359
+ Returns
360
+ -------
361
+ :class:`SparkSession.Builder`
362
+
363
+ Examples
364
+ --------
365
+ >>> SparkSession.builder.remote("sc://localhost") # doctest: +SKIP
366
+ <pyspark.sql.session.SparkSession.Builder...
367
+ """
368
+ return self.config("spark.remote", url)
369
+
370
+ def appName(self, name: str) -> "SparkSession.Builder":
371
+ """Sets a name for the application, which will be shown in the Spark web UI.
372
+
373
+ If no application name is set, a randomly generated name will be used.
374
+
375
+ .. versionadded:: 2.0.0
376
+
377
+ .. versionchanged:: 3.4.0
378
+ Supports Spark Connect.
379
+
380
+ Parameters
381
+ ----------
382
+ name : str
383
+ an application name
384
+
385
+ Returns
386
+ -------
387
+ :class:`SparkSession.Builder`
388
+
389
+ Examples
390
+ --------
391
+ >>> SparkSession.builder.appName("My app")
392
+ <pyspark.sql.session.SparkSession.Builder...
393
+ """
394
+ return self.config("spark.app.name", name)
395
+
396
+ def enableHiveSupport(self) -> "SparkSession.Builder":
397
+ """Enables Hive support, including connectivity to a persistent Hive metastore, support
398
+ for Hive SerDes, and Hive user-defined functions.
399
+
400
+ .. versionadded:: 2.0.0
401
+
402
+ Returns
403
+ -------
404
+ :class:`SparkSession.Builder`
405
+
406
+ Examples
407
+ --------
408
+ >>> SparkSession.builder.enableHiveSupport()
409
+ <pyspark.sql.session.SparkSession.Builder...
410
+ """
411
+ return self.config("spark.sql.catalogImplementation", "hive")
412
+
413
+ def getOrCreate(self) -> "SparkSession":
414
+ """Gets an existing :class:`SparkSession` or, if there is no existing one, creates a
415
+ new one based on the options set in this builder.
416
+
417
+ .. versionadded:: 2.0.0
418
+
419
+ .. versionchanged:: 3.4.0
420
+ Supports Spark Connect.
421
+
422
+ Returns
423
+ -------
424
+ :class:`SparkSession`
425
+
426
+ Examples
427
+ --------
428
+ This method first checks whether there is a valid global default SparkSession, and if
429
+ yes, return that one. If no valid global default SparkSession exists, the method
430
+ creates a new SparkSession and assigns the newly created SparkSession as the global
431
+ default.
432
+
433
+ >>> s1 = SparkSession.builder.config("k1", "v1").getOrCreate()
434
+ >>> s1.conf.get("k1") == "v1"
435
+ True
436
+
437
+ The configuration of the SparkSession can be changed afterwards
438
+
439
+ >>> s1.conf.set("k1", "v1_new")
440
+ >>> s1.conf.get("k1") == "v1_new"
441
+ True
442
+
443
+ In case an existing SparkSession is returned, the config options specified
444
+ in this builder will be applied to the existing SparkSession.
445
+
446
+ >>> s2 = SparkSession.builder.config("k2", "v2").getOrCreate()
447
+ >>> s1.conf.get("k1") == s2.conf.get("k1") == "v1_new"
448
+ True
449
+ >>> s1.conf.get("k2") == s2.conf.get("k2") == "v2"
450
+ True
451
+ """
452
+ from pyspark.context import SparkContext
453
+ from pyspark.conf import SparkConf
454
+
455
+ opts = dict(self._options)
456
+
457
+ with self._lock:
458
+ if (
459
+ "SPARK_CONNECT_MODE_ENABLED" in os.environ
460
+ or "SPARK_REMOTE" in os.environ
461
+ or "spark.remote" in opts
462
+ ):
463
+ with SparkContext._lock:
464
+ from pyspark.sql.connect.session import SparkSession as RemoteSparkSession
465
+
466
+ if (
467
+ SparkContext._active_spark_context is None
468
+ and SparkSession._instantiatedSession is None
469
+ ):
470
+ url = opts.get("spark.remote", os.environ.get("SPARK_REMOTE"))
471
+
472
+ if url.startswith("local"):
473
+ os.environ["SPARK_LOCAL_REMOTE"] = "1"
474
+ RemoteSparkSession._start_connect_server(url, opts)
475
+ url = "sc://localhost"
476
+
477
+ os.environ["SPARK_CONNECT_MODE_ENABLED"] = "1"
478
+ opts["spark.remote"] = url
479
+ return RemoteSparkSession.builder.config(map=opts).getOrCreate()
480
+ elif "SPARK_LOCAL_REMOTE" in os.environ:
481
+ url = "sc://localhost"
482
+ os.environ["SPARK_CONNECT_MODE_ENABLED"] = "1"
483
+ opts["spark.remote"] = url
484
+ return RemoteSparkSession.builder.config(map=opts).getOrCreate()
485
+ else:
486
+ raise RuntimeError(
487
+ "Cannot start a remote Spark session because there "
488
+ "is a regular Spark session already running."
489
+ )
490
+
491
+ session = SparkSession._instantiatedSession
492
+ if session is None or session._sc._jsc is None:
493
+ sparkConf = SparkConf()
494
+ for key, value in self._options.items():
495
+ sparkConf.set(key, value)
496
+ # This SparkContext may be an existing one.
497
+ sc = SparkContext.getOrCreate(sparkConf)
498
+ # Do not update `SparkConf` for existing `SparkContext`, as it's shared
499
+ # by all sessions.
500
+ session = SparkSession(sc, options=self._options)
501
+ else:
502
+ getattr(
503
+ getattr(session._jvm, "SparkSession$"), "MODULE$"
504
+ ).applyModifiableSettings(session._jsparkSession, self._options)
505
+ return session
506
+
507
+ # Spark Connect-specific API
508
+ def create(self) -> "SparkSession":
509
+ """Creates a new SparkSession. Can only be used in the context of Spark Connect
510
+ and will throw an exception otherwise.
511
+
512
+ .. versionadded:: 3.5.0
513
+
514
+ Returns
515
+ -------
516
+ :class:`SparkSession`
517
+
518
+ Notes
519
+ -----
520
+ This method will update the default and/or active session if they are not set.
521
+ """
522
+ opts = dict(self._options)
523
+ if "SPARK_REMOTE" in os.environ or "spark.remote" in opts:
524
+ from pyspark.sql.connect.session import SparkSession as RemoteSparkSession
525
+
526
+ # Validate that no incompatible configuration options are selected.
527
+ self._validate_startup_urls()
528
+
529
+ url = opts.get("spark.remote", os.environ.get("SPARK_REMOTE"))
530
+ if url.startswith("local"):
531
+ raise RuntimeError(
532
+ "Creating new SparkSessions with `local` "
533
+ "connection string is not supported."
534
+ )
535
+
536
+ # Mark this Spark Session as Spark Connect. This prevents that local PySpark is
537
+ # used in conjunction with Spark Connect mode.
538
+ os.environ["SPARK_CONNECT_MODE_ENABLED"] = "1"
539
+ opts["spark.remote"] = url
540
+ return RemoteSparkSession.builder.config(map=opts).create()
541
+ else:
542
+ raise RuntimeError(
543
+ "SparkSession.builder.create() can only be used with Spark Connect; "
544
+ "however, spark.remote was not found."
545
+ )
546
+
547
+ # TODO(SPARK-38912): Replace @classproperty with @classmethod + @property once support for
548
+ # Python 3.8 is dropped.
549
+ #
550
+ # In Python 3.9, the @property decorator has been made compatible with the
551
+ # @classmethod decorator (https://docs.python.org/3.9/library/functions.html#classmethod)
552
+ #
553
+ # @classmethod + @property is also affected by a bug in Python's docstring which was backported
554
+ # to Python 3.9.6 (https://github.com/python/cpython/pull/28838)
555
+ @classproperty
556
+ def builder(cls) -> Builder:
557
+ """Creates a :class:`Builder` for constructing a :class:`SparkSession`.
558
+
559
+ .. versionchanged:: 3.4.0
560
+ Supports Spark Connect.
561
+ """
562
+ return cls.Builder()
563
+
564
+ _instantiatedSession: ClassVar[Optional["SparkSession"]] = None
565
+ _activeSession: ClassVar[Optional["SparkSession"]] = None
566
+
567
+ def __init__(
568
+ self,
569
+ sparkContext: SparkContext,
570
+ jsparkSession: Optional[JavaObject] = None,
571
+ options: Dict[str, Any] = {},
572
+ ):
573
+ self._sc = sparkContext
574
+ self._jsc = self._sc._jsc
575
+ self._jvm = self._sc._jvm
576
+
577
+ assert self._jvm is not None
578
+
579
+ if jsparkSession is None:
580
+ if (
581
+ self._jvm.SparkSession.getDefaultSession().isDefined()
582
+ and not self._jvm.SparkSession.getDefaultSession().get().sparkContext().isStopped()
583
+ ):
584
+ jsparkSession = self._jvm.SparkSession.getDefaultSession().get()
585
+ getattr(getattr(self._jvm, "SparkSession$"), "MODULE$").applyModifiableSettings(
586
+ jsparkSession, options
587
+ )
588
+ else:
589
+ jsparkSession = self._jvm.SparkSession(self._jsc.sc(), options)
590
+ else:
591
+ getattr(getattr(self._jvm, "SparkSession$"), "MODULE$").applyModifiableSettings(
592
+ jsparkSession, options
593
+ )
594
+ self._jsparkSession = jsparkSession
595
+ _monkey_patch_RDD(self)
596
+ install_exception_handler()
597
+ # If we had an instantiated SparkSession attached with a SparkContext
598
+ # which is stopped now, we need to renew the instantiated SparkSession.
599
+ # Otherwise, we will use invalid SparkSession when we call Builder.getOrCreate.
600
+ if (
601
+ SparkSession._instantiatedSession is None
602
+ or SparkSession._instantiatedSession._sc._jsc is None
603
+ ):
604
+ SparkSession._instantiatedSession = self
605
+ SparkSession._activeSession = self
606
+ assert self._jvm is not None
607
+ self._jvm.SparkSession.setDefaultSession(self._jsparkSession)
608
+ self._jvm.SparkSession.setActiveSession(self._jsparkSession)
609
+
610
+ def _repr_html_(self) -> str:
611
+ return """
612
+ <div>
613
+ <p><b>SparkSession - {catalogImplementation}</b></p>
614
+ {sc_HTML}
615
+ </div>
616
+ """.format(
617
+ catalogImplementation=self.conf.get("spark.sql.catalogImplementation"),
618
+ sc_HTML=self.sparkContext._repr_html_(),
619
+ )
620
+
621
+ @property
622
+ def _jconf(self) -> "JavaObject":
623
+ """Accessor for the JVM SQL-specific configurations"""
624
+ return self._jsparkSession.sessionState().conf()
625
+
626
+ def newSession(self) -> "SparkSession":
627
+ """
628
+ Returns a new :class:`SparkSession` as new session, that has separate SQLConf,
629
+ registered temporary views and UDFs, but shared :class:`SparkContext` and
630
+ table cache.
631
+
632
+ .. versionadded:: 2.0.0
633
+
634
+ Returns
635
+ -------
636
+ :class:`SparkSession`
637
+ Spark session if an active session exists for the current thread
638
+
639
+ Examples
640
+ --------
641
+ >>> spark.newSession()
642
+ <...SparkSession object ...>
643
+ """
644
+ return self.__class__(self._sc, self._jsparkSession.newSession())
645
+
646
+ @classmethod
647
+ @try_remote_session_classmethod
648
+ def getActiveSession(cls) -> Optional["SparkSession"]:
649
+ """
650
+ Returns the active :class:`SparkSession` for the current thread, returned by the builder
651
+
652
+ .. versionadded:: 3.0.0
653
+
654
+ .. versionchanged:: 3.5.0
655
+ Supports Spark Connect.
656
+
657
+ Returns
658
+ -------
659
+ :class:`SparkSession`
660
+ Spark session if an active session exists for the current thread
661
+
662
+ Examples
663
+ --------
664
+ >>> s = SparkSession.getActiveSession()
665
+ >>> df = s.createDataFrame([('Alice', 1)], ['name', 'age'])
666
+ >>> df.select("age").show()
667
+ +---+
668
+ |age|
669
+ +---+
670
+ | 1|
671
+ +---+
672
+ """
673
+ from pyspark import SparkContext
674
+
675
+ sc = SparkContext._active_spark_context
676
+ if sc is None:
677
+ return None
678
+ else:
679
+ assert sc._jvm is not None
680
+ if sc._jvm.SparkSession.getActiveSession().isDefined():
681
+ SparkSession(sc, sc._jvm.SparkSession.getActiveSession().get())
682
+ return SparkSession._activeSession
683
+ else:
684
+ return None
685
+
686
+ @classmethod
687
+ @try_remote_session_classmethod
688
+ def active(cls) -> "SparkSession":
689
+ """
690
+ Returns the active or default :class:`SparkSession` for the current thread, returned by
691
+ the builder.
692
+
693
+ .. versionadded:: 3.5.0
694
+
695
+ Returns
696
+ -------
697
+ :class:`SparkSession`
698
+ Spark session if an active or default session exists for the current thread.
699
+ """
700
+ session = cls.getActiveSession()
701
+ if session is None:
702
+ session = cls._instantiatedSession
703
+ if session is None:
704
+ raise PySparkRuntimeError(
705
+ error_class="NO_ACTIVE_OR_DEFAULT_SESSION",
706
+ message_parameters={},
707
+ )
708
+ return session
709
+
710
+ @property
711
+ def sparkContext(self) -> SparkContext:
712
+ """
713
+ Returns the underlying :class:`SparkContext`.
714
+
715
+ .. versionadded:: 2.0.0
716
+
717
+ Returns
718
+ -------
719
+ :class:`SparkContext`
720
+
721
+ Examples
722
+ --------
723
+ >>> spark.sparkContext
724
+ <SparkContext master=... appName=...>
725
+
726
+ Create an RDD from the Spark context
727
+
728
+ >>> rdd = spark.sparkContext.parallelize([1, 2, 3])
729
+ >>> rdd.collect()
730
+ [1, 2, 3]
731
+ """
732
+ return self._sc
733
+
734
+ @property
735
+ def version(self) -> str:
736
+ """
737
+ The version of Spark on which this application is running.
738
+
739
+ .. versionadded:: 2.0.0
740
+
741
+ .. versionchanged:: 3.4.0
742
+ Supports Spark Connect.
743
+
744
+ Returns
745
+ -------
746
+ str
747
+ the version of Spark in string.
748
+
749
+ Examples
750
+ --------
751
+ >>> _ = spark.version
752
+ """
753
+ return self._jsparkSession.version()
754
+
755
+ @property
756
+ def conf(self) -> RuntimeConfig:
757
+ """Runtime configuration interface for Spark.
758
+
759
+ This is the interface through which the user can get and set all Spark and Hadoop
760
+ configurations that are relevant to Spark SQL. When getting the value of a config,
761
+ this defaults to the value set in the underlying :class:`SparkContext`, if any.
762
+
763
+ .. versionadded:: 2.0.0
764
+
765
+ .. versionchanged:: 3.4.0
766
+ Supports Spark Connect.
767
+
768
+ Returns
769
+ -------
770
+ :class:`pyspark.sql.conf.RuntimeConfig`
771
+
772
+ Examples
773
+ --------
774
+ >>> spark.conf
775
+ <pyspark...RuntimeConf...>
776
+
777
+ Set a runtime configuration for the session
778
+
779
+ >>> spark.conf.set("key", "value")
780
+ >>> spark.conf.get("key")
781
+ 'value'
782
+ """
783
+ if not hasattr(self, "_conf"):
784
+ self._conf = RuntimeConfig(self._jsparkSession.conf())
785
+ return self._conf
786
+
787
+ @property
788
+ def catalog(self) -> "Catalog":
789
+ """Interface through which the user may create, drop, alter or query underlying
790
+ databases, tables, functions, etc.
791
+
792
+ .. versionadded:: 2.0.0
793
+
794
+ .. versionchanged:: 3.4.0
795
+ Supports Spark Connect.
796
+
797
+ Returns
798
+ -------
799
+ :class:`Catalog`
800
+
801
+ Examples
802
+ --------
803
+ >>> spark.catalog
804
+ <...Catalog object ...>
805
+
806
+ Create a temp view, show the list, and drop it.
807
+
808
+ >>> spark.range(1).createTempView("test_view")
809
+ >>> spark.catalog.listTables()
810
+ [Table(name='test_view', catalog=None, namespace=[], description=None, ...
811
+ >>> _ = spark.catalog.dropTempView("test_view")
812
+ """
813
+ from pyspark.sql.catalog import Catalog
814
+
815
+ if not hasattr(self, "_catalog"):
816
+ self._catalog = Catalog(self)
817
+ return self._catalog
818
+
819
+ @property
820
+ def udf(self) -> "UDFRegistration":
821
+ """Returns a :class:`UDFRegistration` for UDF registration.
822
+
823
+ .. versionadded:: 2.0.0
824
+
825
+ .. versionchanged:: 3.4.0
826
+ Supports Spark Connect.
827
+
828
+ Returns
829
+ -------
830
+ :class:`UDFRegistration`
831
+
832
+ Examples
833
+ --------
834
+ Register a Python UDF, and use it in SQL.
835
+
836
+ >>> strlen = spark.udf.register("strlen", lambda x: len(x))
837
+ >>> spark.sql("SELECT strlen('test')").show()
838
+ +------------+
839
+ |strlen(test)|
840
+ +------------+
841
+ | 4|
842
+ +------------+
843
+ """
844
+ from pyspark.sql.udf import UDFRegistration
845
+
846
+ return UDFRegistration(self)
847
+
848
+ @property
849
+ def udtf(self) -> "UDTFRegistration":
850
+ """Returns a :class:`UDTFRegistration` for UDTF registration.
851
+
852
+ .. versionadded:: 3.5.0
853
+
854
+ Returns
855
+ -------
856
+ :class:`UDTFRegistration`
857
+
858
+ Notes
859
+ -----
860
+ Supports Spark Connect.
861
+ """
862
+ from pyspark.sql.udtf import UDTFRegistration
863
+
864
+ return UDTFRegistration(self)
865
+
866
+ def range(
867
+ self,
868
+ start: int,
869
+ end: Optional[int] = None,
870
+ step: int = 1,
871
+ numPartitions: Optional[int] = None,
872
+ ) -> DataFrame:
873
+ """
874
+ Create a :class:`DataFrame` with single :class:`pyspark.sql.types.LongType` column named
875
+ ``id``, containing elements in a range from ``start`` to ``end`` (exclusive) with
876
+ step value ``step``.
877
+
878
+ .. versionadded:: 2.0.0
879
+
880
+ .. versionchanged:: 3.4.0
881
+ Supports Spark Connect.
882
+
883
+ Parameters
884
+ ----------
885
+ start : int
886
+ the start value
887
+ end : int, optional
888
+ the end value (exclusive)
889
+ step : int, optional
890
+ the incremental step (default: 1)
891
+ numPartitions : int, optional
892
+ the number of partitions of the DataFrame
893
+
894
+ Returns
895
+ -------
896
+ :class:`DataFrame`
897
+
898
+ Examples
899
+ --------
900
+ >>> spark.range(1, 7, 2).show()
901
+ +---+
902
+ | id|
903
+ +---+
904
+ | 1|
905
+ | 3|
906
+ | 5|
907
+ +---+
908
+
909
+ If only one argument is specified, it will be used as the end value.
910
+
911
+ >>> spark.range(3).show()
912
+ +---+
913
+ | id|
914
+ +---+
915
+ | 0|
916
+ | 1|
917
+ | 2|
918
+ +---+
919
+ """
920
+ if numPartitions is None:
921
+ numPartitions = self._sc.defaultParallelism
922
+
923
+ if end is None:
924
+ jdf = self._jsparkSession.range(0, int(start), int(step), int(numPartitions))
925
+ else:
926
+ jdf = self._jsparkSession.range(int(start), int(end), int(step), int(numPartitions))
927
+
928
+ return DataFrame(jdf, self)
929
+
930
+ def _inferSchemaFromList(
931
+ self, data: Iterable[Any], names: Optional[List[str]] = None
932
+ ) -> StructType:
933
+ """
934
+ Infer schema from list of Row, dict, or tuple.
935
+
936
+ Parameters
937
+ ----------
938
+ data : iterable
939
+ list of Row, dict, or tuple
940
+ names : list, optional
941
+ list of column names
942
+
943
+ Returns
944
+ -------
945
+ :class:`pyspark.sql.types.StructType`
946
+ """
947
+ if not data:
948
+ raise PySparkValueError(
949
+ error_class="CANNOT_INFER_EMPTY_SCHEMA",
950
+ message_parameters={},
951
+ )
952
+ infer_dict_as_struct = self._jconf.inferDictAsStruct()
953
+ infer_array_from_first_element = self._jconf.legacyInferArrayTypeFromFirstElement()
954
+ prefer_timestamp_ntz = is_timestamp_ntz_preferred()
955
+ schema = reduce(
956
+ _merge_type,
957
+ (
958
+ _infer_schema(
959
+ row,
960
+ names,
961
+ infer_dict_as_struct=infer_dict_as_struct,
962
+ infer_array_from_first_element=infer_array_from_first_element,
963
+ prefer_timestamp_ntz=prefer_timestamp_ntz,
964
+ )
965
+ for row in data
966
+ ),
967
+ )
968
+ if _has_nulltype(schema):
969
+ raise PySparkValueError(
970
+ error_class="CANNOT_DETERMINE_TYPE",
971
+ message_parameters={},
972
+ )
973
+ return schema
974
+
975
+ def _inferSchema(
976
+ self,
977
+ rdd: RDD[Any],
978
+ samplingRatio: Optional[float] = None,
979
+ names: Optional[List[str]] = None,
980
+ ) -> StructType:
981
+ """
982
+ Infer schema from an RDD of Row, dict, or tuple.
983
+
984
+ Parameters
985
+ ----------
986
+ rdd : :class:`RDD`
987
+ an RDD of Row, dict, or tuple
988
+ samplingRatio : float, optional
989
+ sampling ratio, or no sampling (default)
990
+ names : list, optional
991
+
992
+ Returns
993
+ -------
994
+ :class:`pyspark.sql.types.StructType`
995
+ """
996
+ first = rdd.first()
997
+ if isinstance(first, Sized) and len(first) == 0:
998
+ raise PySparkValueError(
999
+ error_class="CANNOT_INFER_EMPTY_SCHEMA",
1000
+ message_parameters={},
1001
+ )
1002
+
1003
+ infer_dict_as_struct = self._jconf.inferDictAsStruct()
1004
+ infer_array_from_first_element = self._jconf.legacyInferArrayTypeFromFirstElement()
1005
+ prefer_timestamp_ntz = is_timestamp_ntz_preferred()
1006
+ if samplingRatio is None:
1007
+ schema = _infer_schema(
1008
+ first,
1009
+ names=names,
1010
+ infer_dict_as_struct=infer_dict_as_struct,
1011
+ prefer_timestamp_ntz=prefer_timestamp_ntz,
1012
+ )
1013
+ if _has_nulltype(schema):
1014
+ for row in rdd.take(100)[1:]:
1015
+ schema = _merge_type(
1016
+ schema,
1017
+ _infer_schema(
1018
+ row,
1019
+ names=names,
1020
+ infer_dict_as_struct=infer_dict_as_struct,
1021
+ infer_array_from_first_element=infer_array_from_first_element,
1022
+ prefer_timestamp_ntz=prefer_timestamp_ntz,
1023
+ ),
1024
+ )
1025
+ if not _has_nulltype(schema):
1026
+ break
1027
+ else:
1028
+ raise PySparkValueError(
1029
+ error_class="CANNOT_DETERMINE_TYPE",
1030
+ message_parameters={},
1031
+ )
1032
+ else:
1033
+ if samplingRatio < 0.99:
1034
+ rdd = rdd.sample(False, float(samplingRatio))
1035
+ schema = rdd.map(
1036
+ lambda row: _infer_schema(
1037
+ row,
1038
+ names,
1039
+ infer_dict_as_struct=infer_dict_as_struct,
1040
+ infer_array_from_first_element=infer_array_from_first_element,
1041
+ prefer_timestamp_ntz=prefer_timestamp_ntz,
1042
+ )
1043
+ ).reduce(_merge_type)
1044
+ return schema
1045
+
1046
+ def _createFromRDD(
1047
+ self,
1048
+ rdd: RDD[Any],
1049
+ schema: Optional[Union[DataType, List[str]]],
1050
+ samplingRatio: Optional[float],
1051
+ ) -> Tuple[RDD[Tuple], StructType]:
1052
+ """
1053
+ Create an RDD for DataFrame from an existing RDD, returns the RDD and schema.
1054
+ """
1055
+ if schema is None or isinstance(schema, (list, tuple)):
1056
+ struct = self._inferSchema(rdd, samplingRatio, names=schema)
1057
+ converter = _create_converter(struct)
1058
+ tupled_rdd = rdd.map(converter)
1059
+ if isinstance(schema, (list, tuple)):
1060
+ for i, name in enumerate(schema):
1061
+ struct.fields[i].name = name
1062
+ struct.names[i] = name
1063
+
1064
+ elif isinstance(schema, StructType):
1065
+ struct = schema
1066
+ tupled_rdd = rdd
1067
+
1068
+ else:
1069
+ raise PySparkTypeError(
1070
+ error_class="NOT_LIST_OR_NONE_OR_STRUCT",
1071
+ message_parameters={
1072
+ "arg_name": "schema",
1073
+ "arg_type": type(schema).__name__,
1074
+ },
1075
+ )
1076
+
1077
+ # convert python objects to sql data
1078
+ internal_rdd = tupled_rdd.map(struct.toInternal)
1079
+ return internal_rdd, struct
1080
+
1081
+ def _createFromLocal(
1082
+ self, data: Iterable[Any], schema: Optional[Union[DataType, List[str]]]
1083
+ ) -> Tuple[RDD[Tuple], StructType]:
1084
+ """
1085
+ Create an RDD for DataFrame from a list or pandas.DataFrame, returns
1086
+ the RDD and schema.
1087
+ """
1088
+ # make sure data could consumed multiple times
1089
+ if not isinstance(data, list):
1090
+ data = list(data)
1091
+
1092
+ if schema is None or isinstance(schema, (list, tuple)):
1093
+ struct = self._inferSchemaFromList(data, names=schema)
1094
+ converter = _create_converter(struct)
1095
+ tupled_data: Iterable[Tuple] = map(converter, data)
1096
+ if isinstance(schema, (list, tuple)):
1097
+ for i, name in enumerate(schema):
1098
+ struct.fields[i].name = name
1099
+ struct.names[i] = name
1100
+
1101
+ elif isinstance(schema, StructType):
1102
+ struct = schema
1103
+ tupled_data = data
1104
+
1105
+ else:
1106
+ raise PySparkTypeError(
1107
+ error_class="NOT_LIST_OR_NONE_OR_STRUCT",
1108
+ message_parameters={
1109
+ "arg_name": "schema",
1110
+ "arg_type": type(schema).__name__,
1111
+ },
1112
+ )
1113
+
1114
+ # convert python objects to sql data
1115
+ internal_data = [struct.toInternal(row) for row in tupled_data]
1116
+ return self._sc.parallelize(internal_data), struct
1117
+
1118
+ @staticmethod
1119
+ def _create_shell_session() -> "SparkSession":
1120
+ """
1121
+ Initialize a :class:`SparkSession` for a pyspark shell session. This is called from
1122
+ shell.py to make error handling simpler without needing to declare local variables in
1123
+ that script, which would expose those to users.
1124
+ """
1125
+ import py4j
1126
+ from pyspark.conf import SparkConf
1127
+ from pyspark.context import SparkContext
1128
+
1129
+ try:
1130
+ # Try to access HiveConf, it will raise exception if Hive is not added
1131
+ conf = SparkConf()
1132
+ assert SparkContext._jvm is not None
1133
+ if conf.get("spark.sql.catalogImplementation", "hive").lower() == "hive":
1134
+ SparkContext._jvm.org.apache.hadoop.hive.conf.HiveConf()
1135
+ return SparkSession.builder.enableHiveSupport().getOrCreate()
1136
+ else:
1137
+ return SparkSession._getActiveSessionOrCreate()
1138
+ except (py4j.protocol.Py4JError, TypeError):
1139
+ if conf.get("spark.sql.catalogImplementation", "").lower() == "hive":
1140
+ warnings.warn(
1141
+ "Fall back to non-hive support because failing to access HiveConf, "
1142
+ "please make sure you build spark with hive"
1143
+ )
1144
+
1145
+ return SparkSession._getActiveSessionOrCreate()
1146
+
1147
+ @staticmethod
1148
+ def _getActiveSessionOrCreate(**static_conf: Any) -> "SparkSession":
1149
+ """
1150
+ Returns the active :class:`SparkSession` for the current thread, returned by the builder,
1151
+ or if there is no existing one, creates a new one based on the options set in the builder.
1152
+
1153
+ NOTE that 'static_conf' might not be set if there's an active or default Spark session
1154
+ running.
1155
+ """
1156
+ spark = SparkSession.getActiveSession()
1157
+ if spark is None:
1158
+ builder = SparkSession.builder
1159
+ for k, v in static_conf.items():
1160
+ builder = builder.config(k, v)
1161
+ spark = builder.getOrCreate()
1162
+ return spark
1163
+
1164
+ @overload
1165
+ def createDataFrame(
1166
+ self,
1167
+ data: Iterable["RowLike"],
1168
+ schema: Union[List[str], Tuple[str, ...]] = ...,
1169
+ samplingRatio: Optional[float] = ...,
1170
+ ) -> DataFrame:
1171
+ ...
1172
+
1173
+ @overload
1174
+ def createDataFrame(
1175
+ self,
1176
+ data: "RDD[RowLike]",
1177
+ schema: Union[List[str], Tuple[str, ...]] = ...,
1178
+ samplingRatio: Optional[float] = ...,
1179
+ ) -> DataFrame:
1180
+ ...
1181
+
1182
+ @overload
1183
+ def createDataFrame(
1184
+ self,
1185
+ data: Iterable["RowLike"],
1186
+ schema: Union[StructType, str],
1187
+ *,
1188
+ verifySchema: bool = ...,
1189
+ ) -> DataFrame:
1190
+ ...
1191
+
1192
+ @overload
1193
+ def createDataFrame(
1194
+ self,
1195
+ data: "RDD[RowLike]",
1196
+ schema: Union[StructType, str],
1197
+ *,
1198
+ verifySchema: bool = ...,
1199
+ ) -> DataFrame:
1200
+ ...
1201
+
1202
+ @overload
1203
+ def createDataFrame(
1204
+ self,
1205
+ data: "RDD[AtomicValue]",
1206
+ schema: Union[AtomicType, str],
1207
+ verifySchema: bool = ...,
1208
+ ) -> DataFrame:
1209
+ ...
1210
+
1211
+ @overload
1212
+ def createDataFrame(
1213
+ self,
1214
+ data: Iterable["AtomicValue"],
1215
+ schema: Union[AtomicType, str],
1216
+ verifySchema: bool = ...,
1217
+ ) -> DataFrame:
1218
+ ...
1219
+
1220
+ @overload
1221
+ def createDataFrame(
1222
+ self, data: "PandasDataFrameLike", samplingRatio: Optional[float] = ...
1223
+ ) -> DataFrame:
1224
+ ...
1225
+
1226
+ @overload
1227
+ def createDataFrame(
1228
+ self,
1229
+ data: "PandasDataFrameLike",
1230
+ schema: Union[StructType, str],
1231
+ verifySchema: bool = ...,
1232
+ ) -> DataFrame:
1233
+ ...
1234
+
1235
+ def createDataFrame( # type: ignore[misc]
1236
+ self,
1237
+ data: Union[RDD[Any], Iterable[Any], "PandasDataFrameLike", "ArrayLike"],
1238
+ schema: Optional[Union[AtomicType, StructType, str]] = None,
1239
+ samplingRatio: Optional[float] = None,
1240
+ verifySchema: bool = True,
1241
+ ) -> DataFrame:
1242
+ """
1243
+ Creates a :class:`DataFrame` from an :class:`RDD`, a list, a :class:`pandas.DataFrame`
1244
+ or a :class:`numpy.ndarray`.
1245
+
1246
+ .. versionadded:: 2.0.0
1247
+
1248
+ .. versionchanged:: 3.4.0
1249
+ Supports Spark Connect.
1250
+
1251
+ Parameters
1252
+ ----------
1253
+ data : :class:`RDD` or iterable
1254
+ an RDD of any kind of SQL data representation (:class:`Row`,
1255
+ :class:`tuple`, ``int``, ``boolean``, etc.), or :class:`list`,
1256
+ :class:`pandas.DataFrame` or :class:`numpy.ndarray`.
1257
+ schema : :class:`pyspark.sql.types.DataType`, str or list, optional
1258
+ a :class:`pyspark.sql.types.DataType` or a datatype string or a list of
1259
+ column names, default is None. The data type string format equals to
1260
+ :class:`pyspark.sql.types.DataType.simpleString`, except that top level struct type can
1261
+ omit the ``struct<>``.
1262
+
1263
+ When ``schema`` is a list of column names, the type of each column
1264
+ will be inferred from ``data``.
1265
+
1266
+ When ``schema`` is ``None``, it will try to infer the schema (column names and types)
1267
+ from ``data``, which should be an RDD of either :class:`Row`,
1268
+ :class:`namedtuple`, or :class:`dict`.
1269
+
1270
+ When ``schema`` is :class:`pyspark.sql.types.DataType` or a datatype string, it must
1271
+ match the real data, or an exception will be thrown at runtime. If the given schema is
1272
+ not :class:`pyspark.sql.types.StructType`, it will be wrapped into a
1273
+ :class:`pyspark.sql.types.StructType` as its only field, and the field name will be
1274
+ "value". Each record will also be wrapped into a tuple, which can be converted to row
1275
+ later.
1276
+ samplingRatio : float, optional
1277
+ the sample ratio of rows used for inferring. The first few rows will be used
1278
+ if ``samplingRatio`` is ``None``.
1279
+ verifySchema : bool, optional
1280
+ verify data types of every row against schema. Enabled by default.
1281
+
1282
+ .. versionadded:: 2.1.0
1283
+
1284
+ Returns
1285
+ -------
1286
+ :class:`DataFrame`
1287
+
1288
+ Notes
1289
+ -----
1290
+ Usage with `spark.sql.execution.arrow.pyspark.enabled=True` is experimental.
1291
+
1292
+ Examples
1293
+ --------
1294
+ Create a DataFrame from a list of tuples.
1295
+
1296
+ >>> spark.createDataFrame([('Alice', 1)]).show()
1297
+ +-----+---+
1298
+ | _1| _2|
1299
+ +-----+---+
1300
+ |Alice| 1|
1301
+ +-----+---+
1302
+
1303
+ Create a DataFrame from a list of dictionaries.
1304
+
1305
+ >>> d = [{'name': 'Alice', 'age': 1}]
1306
+ >>> spark.createDataFrame(d).show()
1307
+ +---+-----+
1308
+ |age| name|
1309
+ +---+-----+
1310
+ | 1|Alice|
1311
+ +---+-----+
1312
+
1313
+ Create a DataFrame with column names specified.
1314
+
1315
+ >>> spark.createDataFrame([('Alice', 1)], ['name', 'age']).show()
1316
+ +-----+---+
1317
+ | name|age|
1318
+ +-----+---+
1319
+ |Alice| 1|
1320
+ +-----+---+
1321
+
1322
+ Create a DataFrame with the explicit schema specified.
1323
+
1324
+ >>> from pyspark.sql.types import *
1325
+ >>> schema = StructType([
1326
+ ... StructField("name", StringType(), True),
1327
+ ... StructField("age", IntegerType(), True)])
1328
+ >>> spark.createDataFrame([('Alice', 1)], schema).show()
1329
+ +-----+---+
1330
+ | name|age|
1331
+ +-----+---+
1332
+ |Alice| 1|
1333
+ +-----+---+
1334
+
1335
+ Create a DataFrame with the schema in DDL formatted string.
1336
+
1337
+ >>> spark.createDataFrame([('Alice', 1)], "name: string, age: int").show()
1338
+ +-----+---+
1339
+ | name|age|
1340
+ +-----+---+
1341
+ |Alice| 1|
1342
+ +-----+---+
1343
+
1344
+ Create an empty DataFrame.
1345
+ When initializing an empty DataFrame in PySpark, it's mandatory to specify its schema,
1346
+ as the DataFrame lacks data from which the schema can be inferred.
1347
+
1348
+ >>> spark.createDataFrame([], "name: string, age: int").show()
1349
+ +----+---+
1350
+ |name|age|
1351
+ +----+---+
1352
+ +----+---+
1353
+
1354
+ Create a DataFrame from Row objects.
1355
+
1356
+ >>> from pyspark.sql import Row
1357
+ >>> Person = Row('name', 'age')
1358
+ >>> df = spark.createDataFrame([Person("Alice", 1)])
1359
+ >>> df.show()
1360
+ +-----+---+
1361
+ | name|age|
1362
+ +-----+---+
1363
+ |Alice| 1|
1364
+ +-----+---+
1365
+
1366
+ Create a DataFrame from a pandas DataFrame.
1367
+
1368
+ >>> spark.createDataFrame(df.toPandas()).show() # doctest: +SKIP
1369
+ +-----+---+
1370
+ | name|age|
1371
+ +-----+---+
1372
+ |Alice| 1|
1373
+ +-----+---+
1374
+ >>> spark.createDataFrame(pandas.DataFrame([[1, 2]])).collect() # doctest: +SKIP
1375
+ +---+---+
1376
+ | 0| 1|
1377
+ +---+---+
1378
+ | 1| 2|
1379
+ +---+---+
1380
+ """
1381
+ SparkSession._activeSession = self
1382
+ assert self._jvm is not None
1383
+ self._jvm.SparkSession.setActiveSession(self._jsparkSession)
1384
+ if isinstance(data, DataFrame):
1385
+ raise PySparkTypeError(
1386
+ error_class="SHOULD_NOT_DATAFRAME",
1387
+ message_parameters={"arg_name": "data"},
1388
+ )
1389
+
1390
+ if isinstance(schema, str):
1391
+ schema = cast(Union[AtomicType, StructType, str], _parse_datatype_string(schema))
1392
+ elif isinstance(schema, (list, tuple)):
1393
+ # Must re-encode any unicode strings to be consistent with StructField names
1394
+ schema = [x.encode("utf-8") if not isinstance(x, str) else x for x in schema]
1395
+
1396
+ try:
1397
+ import pandas as pd
1398
+
1399
+ has_pandas = True
1400
+ except Exception:
1401
+ has_pandas = False
1402
+
1403
+ try:
1404
+ import numpy as np
1405
+
1406
+ has_numpy = True
1407
+ except Exception:
1408
+ has_numpy = False
1409
+
1410
+ if has_numpy and isinstance(data, np.ndarray):
1411
+ # `data` of numpy.ndarray type will be converted to a pandas DataFrame,
1412
+ # so pandas is required.
1413
+ from pyspark.sql.pandas.utils import require_minimum_pandas_version
1414
+
1415
+ require_minimum_pandas_version()
1416
+ if data.ndim not in [1, 2]:
1417
+ raise PySparkValueError(
1418
+ error_class="INVALID_NDARRAY_DIMENSION",
1419
+ message_parameters={"dimensions": "1 or 2"},
1420
+ )
1421
+
1422
+ if data.ndim == 1 or data.shape[1] == 1:
1423
+ column_names = ["value"]
1424
+ else:
1425
+ column_names = ["_%s" % i for i in range(1, data.shape[1] + 1)]
1426
+
1427
+ if schema is None and not self._jconf.arrowPySparkEnabled():
1428
+ # Construct `schema` from `np.dtype` of the input NumPy array
1429
+ # TODO: Apply the logic below when self._jconf.arrowPySparkEnabled() is True
1430
+ spark_type = _from_numpy_type(data.dtype)
1431
+ if spark_type is not None:
1432
+ schema = StructType(
1433
+ [StructField(name, spark_type, nullable=True) for name in column_names]
1434
+ )
1435
+
1436
+ data = pd.DataFrame(data, columns=column_names)
1437
+
1438
+ if has_pandas and isinstance(data, pd.DataFrame):
1439
+ # Create a DataFrame from pandas DataFrame.
1440
+ return super(SparkSession, self).createDataFrame( # type: ignore[call-overload]
1441
+ data, schema, samplingRatio, verifySchema
1442
+ )
1443
+ return self._create_dataframe(
1444
+ data, schema, samplingRatio, verifySchema # type: ignore[arg-type]
1445
+ )
1446
+
1447
+ def _create_dataframe(
1448
+ self,
1449
+ data: Union[RDD[Any], Iterable[Any]],
1450
+ schema: Optional[Union[DataType, List[str]]],
1451
+ samplingRatio: Optional[float],
1452
+ verifySchema: bool,
1453
+ ) -> DataFrame:
1454
+ if isinstance(schema, StructType):
1455
+ verify_func = _make_type_verifier(schema) if verifySchema else lambda _: True
1456
+
1457
+ @no_type_check
1458
+ def prepare(obj):
1459
+ verify_func(obj)
1460
+ return obj
1461
+
1462
+ elif isinstance(schema, DataType):
1463
+ dataType = schema
1464
+ schema = StructType().add("value", schema)
1465
+
1466
+ verify_func = (
1467
+ _make_type_verifier(dataType, name="field value")
1468
+ if verifySchema
1469
+ else lambda _: True
1470
+ )
1471
+
1472
+ @no_type_check
1473
+ def prepare(obj):
1474
+ verify_func(obj)
1475
+ return (obj,)
1476
+
1477
+ else:
1478
+
1479
+ def prepare(obj: Any) -> Any:
1480
+ return obj
1481
+
1482
+ if isinstance(data, RDD):
1483
+ rdd, struct = self._createFromRDD(data.map(prepare), schema, samplingRatio)
1484
+ else:
1485
+ rdd, struct = self._createFromLocal(map(prepare, data), schema)
1486
+ assert self._jvm is not None
1487
+ jrdd = self._jvm.SerDeUtil.toJavaArray(rdd._to_java_object_rdd())
1488
+ jdf = self._jsparkSession.applySchemaToPythonRDD(jrdd.rdd(), struct.json())
1489
+ df = DataFrame(jdf, self)
1490
+ df._schema = struct
1491
+ return df
1492
+
1493
+ def sql(
1494
+ self, sqlQuery: str, args: Optional[Union[Dict[str, Any], List]] = None, **kwargs: Any
1495
+ ) -> DataFrame:
1496
+ """Returns a :class:`DataFrame` representing the result of the given query.
1497
+ When ``kwargs`` is specified, this method formats the given string by using the Python
1498
+ standard formatter. The method binds named parameters to SQL literals or
1499
+ positional parameters from `args`. It doesn't support named and positional parameters
1500
+ in the same SQL query.
1501
+
1502
+ .. versionadded:: 2.0.0
1503
+
1504
+ .. versionchanged:: 3.4.0
1505
+ Supports Spark Connect and parameterized SQL.
1506
+
1507
+ .. versionchanged:: 3.5.0
1508
+ Added positional parameters.
1509
+
1510
+ Parameters
1511
+ ----------
1512
+ sqlQuery : str
1513
+ SQL query string.
1514
+ args : dict or list
1515
+ A dictionary of parameter names to Python objects or a list of Python objects
1516
+ that can be converted to SQL literal expressions. See
1517
+ <a href="https://spark.apache.org/docs/latest/sql-ref-datatypes.html">
1518
+ Supported Data Types</a> for supported value types in Python.
1519
+ For example, dictionary keys: "rank", "name", "birthdate";
1520
+ dictionary or list values: 1, "Steven", datetime.date(2023, 4, 2).
1521
+ A value can be also a `Column` of literal expression, in that case it is taken as is.
1522
+
1523
+ .. versionadded:: 3.4.0
1524
+
1525
+ kwargs : dict
1526
+ Other variables that the user wants to set that can be referenced in the query
1527
+
1528
+ .. versionchanged:: 3.3.0
1529
+ Added optional argument ``kwargs`` to specify the mapping of variables in the query.
1530
+ This feature is experimental and unstable.
1531
+
1532
+ Returns
1533
+ -------
1534
+ :class:`DataFrame`
1535
+
1536
+ Examples
1537
+ --------
1538
+ Executing a SQL query.
1539
+
1540
+ >>> spark.sql("SELECT * FROM range(10) where id > 7").show()
1541
+ +---+
1542
+ | id|
1543
+ +---+
1544
+ | 8|
1545
+ | 9|
1546
+ +---+
1547
+
1548
+ Executing a SQL query with variables as Python formatter standard.
1549
+
1550
+ >>> spark.sql(
1551
+ ... "SELECT * FROM range(10) WHERE id > {bound1} AND id < {bound2}", bound1=7, bound2=9
1552
+ ... ).show()
1553
+ +---+
1554
+ | id|
1555
+ +---+
1556
+ | 8|
1557
+ +---+
1558
+
1559
+ >>> mydf = spark.range(10)
1560
+ >>> spark.sql(
1561
+ ... "SELECT {col} FROM {mydf} WHERE id IN {x}",
1562
+ ... col=mydf.id, mydf=mydf, x=tuple(range(4))).show()
1563
+ +---+
1564
+ | id|
1565
+ +---+
1566
+ | 0|
1567
+ | 1|
1568
+ | 2|
1569
+ | 3|
1570
+ +---+
1571
+
1572
+ >>> spark.sql('''
1573
+ ... SELECT m1.a, m2.b
1574
+ ... FROM {table1} m1 INNER JOIN {table2} m2
1575
+ ... ON m1.key = m2.key
1576
+ ... ORDER BY m1.a, m2.b''',
1577
+ ... table1=spark.createDataFrame([(1, "a"), (2, "b")], ["a", "key"]),
1578
+ ... table2=spark.createDataFrame([(3, "a"), (4, "b"), (5, "b")], ["b", "key"])).show()
1579
+ +---+---+
1580
+ | a| b|
1581
+ +---+---+
1582
+ | 1| 3|
1583
+ | 2| 4|
1584
+ | 2| 5|
1585
+ +---+---+
1586
+
1587
+ Also, it is possible to query using class:`Column` from :class:`DataFrame`.
1588
+
1589
+ >>> mydf = spark.createDataFrame([(1, 4), (2, 4), (3, 6)], ["A", "B"])
1590
+ >>> spark.sql("SELECT {df.A}, {df[B]} FROM {df}", df=mydf).show()
1591
+ +---+---+
1592
+ | A| B|
1593
+ +---+---+
1594
+ | 1| 4|
1595
+ | 2| 4|
1596
+ | 3| 6|
1597
+ +---+---+
1598
+
1599
+ And substitude named parameters with the `:` prefix by SQL literals.
1600
+
1601
+ >>> spark.sql("SELECT * FROM {df} WHERE {df[B]} > :minB", {"minB" : 5}, df=mydf).show()
1602
+ +---+---+
1603
+ | A| B|
1604
+ +---+---+
1605
+ | 3| 6|
1606
+ +---+---+
1607
+
1608
+ Or positional parameters marked by `?` in the SQL query by SQL literals.
1609
+
1610
+ >>> spark.sql(
1611
+ ... "SELECT * FROM {df} WHERE {df[B]} > ? and ? < {df[A]}",
1612
+ ... args=[5, 2], df=mydf).show()
1613
+ +---+---+
1614
+ | A| B|
1615
+ +---+---+
1616
+ | 3| 6|
1617
+ +---+---+
1618
+ """
1619
+
1620
+ formatter = SQLStringFormatter(self)
1621
+ if len(kwargs) > 0:
1622
+ sqlQuery = formatter.format(sqlQuery, **kwargs)
1623
+ try:
1624
+ if isinstance(args, Dict):
1625
+ litArgs = {k: _to_java_column(lit(v)) for k, v in (args or {}).items()}
1626
+ else:
1627
+ assert self._jvm is not None
1628
+ litArgs = self._jvm.PythonUtils.toArray(
1629
+ [_to_java_column(lit(v)) for v in (args or [])]
1630
+ )
1631
+ return DataFrame(self._jsparkSession.sql(sqlQuery, litArgs), self)
1632
+ finally:
1633
+ if len(kwargs) > 0:
1634
+ formatter.clear()
1635
+
1636
+ def table(self, tableName: str) -> DataFrame:
1637
+ """Returns the specified table as a :class:`DataFrame`.
1638
+
1639
+ .. versionadded:: 2.0.0
1640
+
1641
+ .. versionchanged:: 3.4.0
1642
+ Supports Spark Connect.
1643
+
1644
+ Parameters
1645
+ ----------
1646
+ tableName : str
1647
+ the table name to retrieve.
1648
+
1649
+ Returns
1650
+ -------
1651
+ :class:`DataFrame`
1652
+
1653
+ Examples
1654
+ --------
1655
+ >>> spark.range(5).createOrReplaceTempView("table1")
1656
+ >>> spark.table("table1").sort("id").show()
1657
+ +---+
1658
+ | id|
1659
+ +---+
1660
+ | 0|
1661
+ | 1|
1662
+ | 2|
1663
+ | 3|
1664
+ | 4|
1665
+ +---+
1666
+ """
1667
+ return DataFrame(self._jsparkSession.table(tableName), self)
1668
+
1669
+ @property
1670
+ def read(self) -> DataFrameReader:
1671
+ """
1672
+ Returns a :class:`DataFrameReader` that can be used to read data
1673
+ in as a :class:`DataFrame`.
1674
+
1675
+ .. versionadded:: 2.0.0
1676
+
1677
+ .. versionchanged:: 3.4.0
1678
+ Supports Spark Connect.
1679
+
1680
+ Returns
1681
+ -------
1682
+ :class:`DataFrameReader`
1683
+
1684
+ Examples
1685
+ --------
1686
+ >>> spark.read
1687
+ <...DataFrameReader object ...>
1688
+
1689
+ Write a DataFrame into a JSON file and read it back.
1690
+
1691
+ >>> import tempfile
1692
+ >>> with tempfile.TemporaryDirectory() as d:
1693
+ ... # Write a DataFrame into a JSON file
1694
+ ... spark.createDataFrame(
1695
+ ... [{"age": 100, "name": "Hyukjin Kwon"}]
1696
+ ... ).write.mode("overwrite").format("json").save(d)
1697
+ ...
1698
+ ... # Read the JSON file as a DataFrame.
1699
+ ... spark.read.format('json').load(d).show()
1700
+ +---+------------+
1701
+ |age| name|
1702
+ +---+------------+
1703
+ |100|Hyukjin Kwon|
1704
+ +---+------------+
1705
+ """
1706
+ return DataFrameReader(self)
1707
+
1708
+ @property
1709
+ def readStream(self) -> DataStreamReader:
1710
+ """
1711
+ Returns a :class:`DataStreamReader` that can be used to read data streams
1712
+ as a streaming :class:`DataFrame`.
1713
+
1714
+ .. versionadded:: 2.0.0
1715
+
1716
+ .. versionchanged:: 3.5.0
1717
+ Supports Spark Connect.
1718
+
1719
+ Notes
1720
+ -----
1721
+ This API is evolving.
1722
+
1723
+ Returns
1724
+ -------
1725
+ :class:`DataStreamReader`
1726
+
1727
+ Examples
1728
+ --------
1729
+ >>> spark.readStream
1730
+ <pyspark...DataStreamReader object ...>
1731
+
1732
+ The example below uses Rate source that generates rows continuously.
1733
+ After that, we operate a modulo by 3, and then write the stream out to the console.
1734
+ The streaming query stops in 3 seconds.
1735
+
1736
+ >>> import time
1737
+ >>> df = spark.readStream.format("rate").load()
1738
+ >>> df = df.selectExpr("value % 3 as v")
1739
+ >>> q = df.writeStream.format("console").start()
1740
+ >>> time.sleep(3)
1741
+ >>> q.stop()
1742
+ """
1743
+ return DataStreamReader(self)
1744
+
1745
+ @property
1746
+ def streams(self) -> "StreamingQueryManager":
1747
+ """Returns a :class:`StreamingQueryManager` that allows managing all the
1748
+ :class:`StreamingQuery` instances active on `this` context.
1749
+
1750
+ .. versionadded:: 2.0.0
1751
+
1752
+ .. versionchanged:: 3.5.0
1753
+ Supports Spark Connect.
1754
+
1755
+ Notes
1756
+ -----
1757
+ This API is evolving.
1758
+
1759
+ Returns
1760
+ -------
1761
+ :class:`StreamingQueryManager`
1762
+
1763
+ Examples
1764
+ --------
1765
+ >>> spark.streams
1766
+ <pyspark...StreamingQueryManager object ...>
1767
+
1768
+ Get the list of active streaming queries
1769
+
1770
+ >>> sq = spark.readStream.format(
1771
+ ... "rate").load().writeStream.format('memory').queryName('this_query').start()
1772
+ >>> sqm = spark.streams
1773
+ >>> [q.name for q in sqm.active]
1774
+ ['this_query']
1775
+ >>> sq.stop()
1776
+ """
1777
+ from pyspark.sql.streaming import StreamingQueryManager
1778
+
1779
+ return StreamingQueryManager(self._jsparkSession.streams())
1780
+
1781
+ def stop(self) -> None:
1782
+ """
1783
+ Stop the underlying :class:`SparkContext`.
1784
+
1785
+ .. versionadded:: 2.0.0
1786
+
1787
+ .. versionchanged:: 3.4.0
1788
+ Supports Spark Connect.
1789
+
1790
+ Examples
1791
+ --------
1792
+ >>> spark.stop() # doctest: +SKIP
1793
+ """
1794
+ from pyspark.sql.context import SQLContext
1795
+
1796
+ self._sc.stop()
1797
+ # We should clean the default session up. See SPARK-23228.
1798
+ assert self._jvm is not None
1799
+ self._jvm.SparkSession.clearDefaultSession()
1800
+ self._jvm.SparkSession.clearActiveSession()
1801
+ SparkSession._instantiatedSession = None
1802
+ SparkSession._activeSession = None
1803
+ SQLContext._instantiatedContext = None
1804
+
1805
+ def __enter__(self) -> "SparkSession":
1806
+ """
1807
+ Enable 'with SparkSession.builder.(...).getOrCreate() as session: app' syntax.
1808
+
1809
+ .. versionadded:: 2.0.0
1810
+
1811
+ Examples
1812
+ --------
1813
+ >>> with SparkSession.builder.master("local").getOrCreate() as session:
1814
+ ... session.range(5).show() # doctest: +SKIP
1815
+ +---+
1816
+ | id|
1817
+ +---+
1818
+ | 0|
1819
+ | 1|
1820
+ | 2|
1821
+ | 3|
1822
+ | 4|
1823
+ +---+
1824
+ """
1825
+ return self
1826
+
1827
+ def __exit__(
1828
+ self,
1829
+ exc_type: Optional[Type[BaseException]],
1830
+ exc_val: Optional[BaseException],
1831
+ exc_tb: Optional[TracebackType],
1832
+ ) -> None:
1833
+ """
1834
+ Enable 'with SparkSession.builder.(...).getOrCreate() as session: app' syntax.
1835
+
1836
+ Specifically stop the SparkSession on exit of the with block.
1837
+
1838
+ .. versionadded:: 2.0.0
1839
+
1840
+ Examples
1841
+ --------
1842
+ >>> with SparkSession.builder.master("local").getOrCreate() as session:
1843
+ ... session.range(5).show() # doctest: +SKIP
1844
+ +---+
1845
+ | id|
1846
+ +---+
1847
+ | 0|
1848
+ | 1|
1849
+ | 2|
1850
+ | 3|
1851
+ | 4|
1852
+ +---+
1853
+ """
1854
+ self.stop()
1855
+
1856
+ # SparkConnect-specific API
1857
+ @property
1858
+ def client(self) -> "SparkConnectClient":
1859
+ """
1860
+ Gives access to the Spark Connect client. In normal cases this is not necessary to be used
1861
+ and only relevant for testing.
1862
+
1863
+ .. versionadded:: 3.4.0
1864
+
1865
+ Returns
1866
+ -------
1867
+ :class:`SparkConnectClient`
1868
+
1869
+ Notes
1870
+ -----
1871
+ This API is unstable, and a developer API. It returns non-API instance
1872
+ :class:`SparkConnectClient`.
1873
+ This is an API dedicated to Spark Connect client only. With regular Spark Session, it throws
1874
+ an exception.
1875
+ """
1876
+ raise RuntimeError(
1877
+ "SparkSession.client is only supported with Spark Connect; "
1878
+ "however, the current Spark session does not use Spark Connect."
1879
+ )
1880
+
1881
+ def addArtifacts(
1882
+ self, *path: str, pyfile: bool = False, archive: bool = False, file: bool = False
1883
+ ) -> None:
1884
+ """
1885
+ Add artifact(s) to the client session. Currently only local files are supported.
1886
+
1887
+ .. versionadded:: 3.5.0
1888
+
1889
+ Parameters
1890
+ ----------
1891
+ *path : tuple of str
1892
+ Artifact's URIs to add.
1893
+ pyfile : bool
1894
+ Whether to add them as Python dependencies such as .py, .egg, .zip or .jar files.
1895
+ The pyfiles are directly inserted into the path when executing Python functions
1896
+ in executors.
1897
+ archive : bool
1898
+ Whether to add them as archives such as .zip, .jar, .tar.gz, .tgz, or .tar files.
1899
+ The archives are unpacked on the executor side automatically.
1900
+ file : bool
1901
+ Add a file to be downloaded with this Spark job on every node.
1902
+ The ``path`` passed can only be a local file for now.
1903
+
1904
+ Notes
1905
+ -----
1906
+ This is an API dedicated to Spark Connect client only. With regular Spark Session, it throws
1907
+ an exception.
1908
+ """
1909
+ raise RuntimeError(
1910
+ "SparkSession.addArtifact(s) is only supported with Spark Connect; "
1911
+ "however, the current Spark session does not use Spark Connect."
1912
+ )
1913
+
1914
+ addArtifact = addArtifacts
1915
+
1916
+ def copyFromLocalToFs(self, local_path: str, dest_path: str) -> None:
1917
+ """
1918
+ Copy file from local to cloud storage file system.
1919
+ If the file already exits in destination path, old file is overwritten.
1920
+
1921
+ .. versionadded:: 3.5.0
1922
+
1923
+ Parameters
1924
+ ----------
1925
+ local_path: str
1926
+ Path to a local file. Directories are not supported.
1927
+ The path can be either an absolute path or a relative path.
1928
+ dest_path: str
1929
+ The cloud storage path to the destination the file will
1930
+ be copied to.
1931
+ The path must be an an absolute path.
1932
+
1933
+ Notes
1934
+ -----
1935
+ This API is a developer API.
1936
+ Also, this is an API dedicated to Spark Connect client only. With regular
1937
+ Spark Session, it throws an exception.
1938
+ """
1939
+ raise RuntimeError(
1940
+ "SparkSession.copyFromLocalToFs is only supported with Spark Connect; "
1941
+ "however, the current Spark session does not use Spark Connect."
1942
+ )
1943
+
1944
+ def interruptAll(self) -> List[str]:
1945
+ """
1946
+ Interrupt all operations of this session currently running on the connected server.
1947
+
1948
+ .. versionadded:: 3.5.0
1949
+
1950
+ Returns
1951
+ -------
1952
+ list of str
1953
+ List of operationIds of interrupted operations.
1954
+
1955
+ Notes
1956
+ -----
1957
+ There is still a possibility of operation finishing just as it is interrupted.
1958
+ """
1959
+ raise RuntimeError(
1960
+ "SparkSession.interruptAll is only supported with Spark Connect; "
1961
+ "however, the current Spark session does not use Spark Connect."
1962
+ )
1963
+
1964
+ def interruptTag(self, tag: str) -> List[str]:
1965
+ """
1966
+ Interrupt all operations of this session with the given operation tag.
1967
+
1968
+ .. versionadded:: 3.5.0
1969
+
1970
+ Returns
1971
+ -------
1972
+ list of str
1973
+ List of operationIds of interrupted operations.
1974
+
1975
+ Notes
1976
+ -----
1977
+ There is still a possibility of operation finishing just as it is interrupted.
1978
+ """
1979
+ raise RuntimeError(
1980
+ "SparkSession.interruptTag is only supported with Spark Connect; "
1981
+ "however, the current Spark session does not use Spark Connect."
1982
+ )
1983
+
1984
+ def interruptOperation(self, op_id: str) -> List[str]:
1985
+ """
1986
+ Interrupt an operation of this session with the given operationId.
1987
+
1988
+ .. versionadded:: 3.5.0
1989
+
1990
+ Returns
1991
+ -------
1992
+ list of str
1993
+ List of operationIds of interrupted operations.
1994
+
1995
+ Notes
1996
+ -----
1997
+ There is still a possibility of operation finishing just as it is interrupted.
1998
+ """
1999
+ raise RuntimeError(
2000
+ "SparkSession.interruptOperation is only supported with Spark Connect; "
2001
+ "however, the current Spark session does not use Spark Connect."
2002
+ )
2003
+
2004
+ def addTag(self, tag: str) -> None:
2005
+ """
2006
+ Add a tag to be assigned to all the operations started by this thread in this session.
2007
+
2008
+ .. versionadded:: 3.5.0
2009
+
2010
+ Parameters
2011
+ ----------
2012
+ tag : list of str
2013
+ The tag to be added. Cannot contain ',' (comma) character or be an empty string.
2014
+ """
2015
+ raise RuntimeError(
2016
+ "SparkSession.addTag is only supported with Spark Connect; "
2017
+ "however, the current Spark session does not use Spark Connect."
2018
+ )
2019
+
2020
+ def removeTag(self, tag: str) -> None:
2021
+ """
2022
+ Remove a tag previously added to be assigned to all the operations started by this thread in
2023
+ this session. Noop if such a tag was not added earlier.
2024
+
2025
+ .. versionadded:: 3.5.0
2026
+
2027
+ Parameters
2028
+ ----------
2029
+ tag : list of str
2030
+ The tag to be removed. Cannot contain ',' (comma) character or be an empty string.
2031
+ """
2032
+ raise RuntimeError(
2033
+ "SparkSession.removeTag is only supported with Spark Connect; "
2034
+ "however, the current Spark session does not use Spark Connect."
2035
+ )
2036
+
2037
+ def getTags(self) -> Set[str]:
2038
+ """
2039
+ Get the tags that are currently set to be assigned to all the operations started by this
2040
+ thread.
2041
+
2042
+ .. versionadded:: 3.5.0
2043
+
2044
+ Returns
2045
+ -------
2046
+ set of str
2047
+ Set of tags of interrupted operations.
2048
+ """
2049
+ raise RuntimeError(
2050
+ "SparkSession.getTags is only supported with Spark Connect; "
2051
+ "however, the current Spark session does not use Spark Connect."
2052
+ )
2053
+
2054
+ def clearTags(self) -> None:
2055
+ """
2056
+ Clear the current thread's operation tags.
2057
+
2058
+ .. versionadded:: 3.5.0
2059
+ """
2060
+ raise RuntimeError(
2061
+ "SparkSession.clearTags is only supported with Spark Connect; "
2062
+ "however, the current Spark session does not use Spark Connect."
2063
+ )
2064
+
2065
+
2066
+ def _test() -> None:
2067
+ import os
2068
+ import doctest
2069
+ import pyspark.sql.session
2070
+
2071
+ os.chdir(os.environ["SPARK_HOME"])
2072
+
2073
+ globs = pyspark.sql.session.__dict__.copy()
2074
+ globs["spark"] = (
2075
+ SparkSession.builder.master("local[4]").appName("sql.session tests").getOrCreate()
2076
+ )
2077
+ (failure_count, test_count) = doctest.testmod(
2078
+ pyspark.sql.session,
2079
+ globs=globs,
2080
+ optionflags=doctest.ELLIPSIS | doctest.NORMALIZE_WHITESPACE,
2081
+ )
2082
+ globs["spark"].stop()
2083
+ if failure_count:
2084
+ sys.exit(-1)
2085
+
2086
+
2087
+ if __name__ == "__main__":
2088
+ _test()