snowpark-connect 0.20.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of snowpark-connect might be problematic. Click here for more details.

Files changed (879) hide show
  1. snowflake/snowpark_connect/__init__.py +23 -0
  2. snowflake/snowpark_connect/analyze_plan/__init__.py +3 -0
  3. snowflake/snowpark_connect/analyze_plan/map_tree_string.py +38 -0
  4. snowflake/snowpark_connect/column_name_handler.py +735 -0
  5. snowflake/snowpark_connect/config.py +576 -0
  6. snowflake/snowpark_connect/constants.py +47 -0
  7. snowflake/snowpark_connect/control_server.py +52 -0
  8. snowflake/snowpark_connect/dataframe_name_handler.py +54 -0
  9. snowflake/snowpark_connect/date_time_format_mapping.py +399 -0
  10. snowflake/snowpark_connect/empty_dataframe.py +18 -0
  11. snowflake/snowpark_connect/error/__init__.py +11 -0
  12. snowflake/snowpark_connect/error/error_mapping.py +6174 -0
  13. snowflake/snowpark_connect/error/error_utils.py +321 -0
  14. snowflake/snowpark_connect/error/exceptions.py +24 -0
  15. snowflake/snowpark_connect/execute_plan/__init__.py +3 -0
  16. snowflake/snowpark_connect/execute_plan/map_execution_command.py +204 -0
  17. snowflake/snowpark_connect/execute_plan/map_execution_root.py +173 -0
  18. snowflake/snowpark_connect/execute_plan/utils.py +183 -0
  19. snowflake/snowpark_connect/expression/__init__.py +3 -0
  20. snowflake/snowpark_connect/expression/literal.py +90 -0
  21. snowflake/snowpark_connect/expression/map_cast.py +343 -0
  22. snowflake/snowpark_connect/expression/map_expression.py +293 -0
  23. snowflake/snowpark_connect/expression/map_extension.py +104 -0
  24. snowflake/snowpark_connect/expression/map_sql_expression.py +633 -0
  25. snowflake/snowpark_connect/expression/map_udf.py +142 -0
  26. snowflake/snowpark_connect/expression/map_unresolved_attribute.py +241 -0
  27. snowflake/snowpark_connect/expression/map_unresolved_extract_value.py +85 -0
  28. snowflake/snowpark_connect/expression/map_unresolved_function.py +9450 -0
  29. snowflake/snowpark_connect/expression/map_unresolved_star.py +218 -0
  30. snowflake/snowpark_connect/expression/map_update_fields.py +164 -0
  31. snowflake/snowpark_connect/expression/map_window_function.py +258 -0
  32. snowflake/snowpark_connect/expression/typer.py +125 -0
  33. snowflake/snowpark_connect/includes/__init__.py +0 -0
  34. snowflake/snowpark_connect/includes/jars/antlr4-runtime-4.9.3.jar +0 -0
  35. snowflake/snowpark_connect/includes/jars/commons-cli-1.5.0.jar +0 -0
  36. snowflake/snowpark_connect/includes/jars/commons-codec-1.16.1.jar +0 -0
  37. snowflake/snowpark_connect/includes/jars/commons-collections-3.2.2.jar +0 -0
  38. snowflake/snowpark_connect/includes/jars/commons-collections4-4.4.jar +0 -0
  39. snowflake/snowpark_connect/includes/jars/commons-compiler-3.1.9.jar +0 -0
  40. snowflake/snowpark_connect/includes/jars/commons-compress-1.26.0.jar +0 -0
  41. snowflake/snowpark_connect/includes/jars/commons-crypto-1.1.0.jar +0 -0
  42. snowflake/snowpark_connect/includes/jars/commons-dbcp-1.4.jar +0 -0
  43. snowflake/snowpark_connect/includes/jars/commons-io-2.16.1.jar +0 -0
  44. snowflake/snowpark_connect/includes/jars/commons-lang-2.6.jar +0 -0
  45. snowflake/snowpark_connect/includes/jars/commons-lang3-3.12.0.jar +0 -0
  46. snowflake/snowpark_connect/includes/jars/commons-logging-1.1.3.jar +0 -0
  47. snowflake/snowpark_connect/includes/jars/commons-math3-3.6.1.jar +0 -0
  48. snowflake/snowpark_connect/includes/jars/commons-pool-1.5.4.jar +0 -0
  49. snowflake/snowpark_connect/includes/jars/commons-text-1.10.0.jar +0 -0
  50. snowflake/snowpark_connect/includes/jars/hadoop-client-api-3.3.4.jar +0 -0
  51. snowflake/snowpark_connect/includes/jars/jackson-annotations-2.15.2.jar +0 -0
  52. snowflake/snowpark_connect/includes/jars/jackson-core-2.15.2.jar +0 -0
  53. snowflake/snowpark_connect/includes/jars/jackson-core-asl-1.9.13.jar +0 -0
  54. snowflake/snowpark_connect/includes/jars/jackson-databind-2.15.2.jar +0 -0
  55. snowflake/snowpark_connect/includes/jars/jackson-dataformat-yaml-2.15.2.jar +0 -0
  56. snowflake/snowpark_connect/includes/jars/jackson-datatype-jsr310-2.15.2.jar +0 -0
  57. snowflake/snowpark_connect/includes/jars/jackson-mapper-asl-1.9.13.jar +0 -0
  58. snowflake/snowpark_connect/includes/jars/jackson-module-scala_2.12-2.15.2.jar +0 -0
  59. snowflake/snowpark_connect/includes/jars/json4s-ast_2.12-3.7.0-M11.jar +0 -0
  60. snowflake/snowpark_connect/includes/jars/json4s-core_2.12-3.7.0-M11.jar +0 -0
  61. snowflake/snowpark_connect/includes/jars/json4s-jackson_2.12-3.7.0-M11.jar +0 -0
  62. snowflake/snowpark_connect/includes/jars/json4s-scalap_2.12-3.7.0-M11.jar +0 -0
  63. snowflake/snowpark_connect/includes/jars/kryo-shaded-4.0.2.jar +0 -0
  64. snowflake/snowpark_connect/includes/jars/log4j-1.2-api-2.20.0.jar +0 -0
  65. snowflake/snowpark_connect/includes/jars/log4j-api-2.20.0.jar +0 -0
  66. snowflake/snowpark_connect/includes/jars/log4j-core-2.20.0.jar +0 -0
  67. snowflake/snowpark_connect/includes/jars/log4j-slf4j2-impl-2.20.0.jar +0 -0
  68. snowflake/snowpark_connect/includes/jars/paranamer-2.8.jar +0 -0
  69. snowflake/snowpark_connect/includes/jars/scala-collection-compat_2.12-2.7.0.jar +0 -0
  70. snowflake/snowpark_connect/includes/jars/scala-compiler-2.12.18.jar +0 -0
  71. snowflake/snowpark_connect/includes/jars/scala-library-2.12.18.jar +0 -0
  72. snowflake/snowpark_connect/includes/jars/scala-parser-combinators_2.12-2.3.0.jar +0 -0
  73. snowflake/snowpark_connect/includes/jars/scala-reflect-2.12.18.jar +0 -0
  74. snowflake/snowpark_connect/includes/jars/scala-xml_2.12-2.1.0.jar +0 -0
  75. snowflake/snowpark_connect/includes/jars/slf4j-api-2.0.7.jar +0 -0
  76. snowflake/snowpark_connect/includes/jars/spark-catalyst_2.12-3.5.6.jar +0 -0
  77. snowflake/snowpark_connect/includes/jars/spark-common-utils_2.12-3.5.6.jar +0 -0
  78. snowflake/snowpark_connect/includes/jars/spark-core_2.12-3.5.6.jar +0 -0
  79. snowflake/snowpark_connect/includes/jars/spark-graphx_2.12-3.5.6.jar +0 -0
  80. snowflake/snowpark_connect/includes/jars/spark-hive-thriftserver_2.12-3.5.6.jar +0 -0
  81. snowflake/snowpark_connect/includes/jars/spark-hive_2.12-3.5.6.jar +0 -0
  82. snowflake/snowpark_connect/includes/jars/spark-kubernetes_2.12-3.5.6.jar +0 -0
  83. snowflake/snowpark_connect/includes/jars/spark-kvstore_2.12-3.5.6.jar +0 -0
  84. snowflake/snowpark_connect/includes/jars/spark-launcher_2.12-3.5.6.jar +0 -0
  85. snowflake/snowpark_connect/includes/jars/spark-mesos_2.12-3.5.6.jar +0 -0
  86. snowflake/snowpark_connect/includes/jars/spark-mllib-local_2.12-3.5.6.jar +0 -0
  87. snowflake/snowpark_connect/includes/jars/spark-mllib_2.12-3.5.6.jar +0 -0
  88. snowflake/snowpark_connect/includes/jars/spark-network-common_2.12-3.5.6.jar +0 -0
  89. snowflake/snowpark_connect/includes/jars/spark-network-shuffle_2.12-3.5.6.jar +0 -0
  90. snowflake/snowpark_connect/includes/jars/spark-repl_2.12-3.5.6.jar +0 -0
  91. snowflake/snowpark_connect/includes/jars/spark-sketch_2.12-3.5.6.jar +0 -0
  92. snowflake/snowpark_connect/includes/jars/spark-sql-api_2.12-3.5.6.jar +0 -0
  93. snowflake/snowpark_connect/includes/jars/spark-sql_2.12-3.5.6.jar +0 -0
  94. snowflake/snowpark_connect/includes/jars/spark-streaming_2.12-3.5.6.jar +0 -0
  95. snowflake/snowpark_connect/includes/jars/spark-tags_2.12-3.5.6.jar +0 -0
  96. snowflake/snowpark_connect/includes/jars/spark-unsafe_2.12-3.5.6.jar +0 -0
  97. snowflake/snowpark_connect/includes/jars/spark-yarn_2.12-3.5.6.jar +0 -0
  98. snowflake/snowpark_connect/includes/python/__init__.py +21 -0
  99. snowflake/snowpark_connect/includes/python/pyspark/__init__.py +173 -0
  100. snowflake/snowpark_connect/includes/python/pyspark/_globals.py +71 -0
  101. snowflake/snowpark_connect/includes/python/pyspark/_typing.pyi +43 -0
  102. snowflake/snowpark_connect/includes/python/pyspark/accumulators.py +341 -0
  103. snowflake/snowpark_connect/includes/python/pyspark/broadcast.py +383 -0
  104. snowflake/snowpark_connect/includes/python/pyspark/cloudpickle/__init__.py +8 -0
  105. snowflake/snowpark_connect/includes/python/pyspark/cloudpickle/cloudpickle.py +948 -0
  106. snowflake/snowpark_connect/includes/python/pyspark/cloudpickle/cloudpickle_fast.py +844 -0
  107. snowflake/snowpark_connect/includes/python/pyspark/cloudpickle/compat.py +18 -0
  108. snowflake/snowpark_connect/includes/python/pyspark/conf.py +276 -0
  109. snowflake/snowpark_connect/includes/python/pyspark/context.py +2601 -0
  110. snowflake/snowpark_connect/includes/python/pyspark/daemon.py +218 -0
  111. snowflake/snowpark_connect/includes/python/pyspark/errors/__init__.py +70 -0
  112. snowflake/snowpark_connect/includes/python/pyspark/errors/error_classes.py +889 -0
  113. snowflake/snowpark_connect/includes/python/pyspark/errors/exceptions/__init__.py +16 -0
  114. snowflake/snowpark_connect/includes/python/pyspark/errors/exceptions/base.py +228 -0
  115. snowflake/snowpark_connect/includes/python/pyspark/errors/exceptions/captured.py +307 -0
  116. snowflake/snowpark_connect/includes/python/pyspark/errors/exceptions/connect.py +190 -0
  117. snowflake/snowpark_connect/includes/python/pyspark/errors/tests/__init__.py +16 -0
  118. snowflake/snowpark_connect/includes/python/pyspark/errors/tests/test_errors.py +60 -0
  119. snowflake/snowpark_connect/includes/python/pyspark/errors/utils.py +116 -0
  120. snowflake/snowpark_connect/includes/python/pyspark/files.py +165 -0
  121. snowflake/snowpark_connect/includes/python/pyspark/find_spark_home.py +95 -0
  122. snowflake/snowpark_connect/includes/python/pyspark/install.py +203 -0
  123. snowflake/snowpark_connect/includes/python/pyspark/instrumentation_utils.py +190 -0
  124. snowflake/snowpark_connect/includes/python/pyspark/java_gateway.py +248 -0
  125. snowflake/snowpark_connect/includes/python/pyspark/join.py +118 -0
  126. snowflake/snowpark_connect/includes/python/pyspark/ml/__init__.py +71 -0
  127. snowflake/snowpark_connect/includes/python/pyspark/ml/_typing.pyi +84 -0
  128. snowflake/snowpark_connect/includes/python/pyspark/ml/base.py +414 -0
  129. snowflake/snowpark_connect/includes/python/pyspark/ml/classification.py +4332 -0
  130. snowflake/snowpark_connect/includes/python/pyspark/ml/clustering.py +2188 -0
  131. snowflake/snowpark_connect/includes/python/pyspark/ml/common.py +146 -0
  132. snowflake/snowpark_connect/includes/python/pyspark/ml/connect/__init__.py +44 -0
  133. snowflake/snowpark_connect/includes/python/pyspark/ml/connect/base.py +346 -0
  134. snowflake/snowpark_connect/includes/python/pyspark/ml/connect/classification.py +382 -0
  135. snowflake/snowpark_connect/includes/python/pyspark/ml/connect/evaluation.py +291 -0
  136. snowflake/snowpark_connect/includes/python/pyspark/ml/connect/feature.py +258 -0
  137. snowflake/snowpark_connect/includes/python/pyspark/ml/connect/functions.py +77 -0
  138. snowflake/snowpark_connect/includes/python/pyspark/ml/connect/io_utils.py +335 -0
  139. snowflake/snowpark_connect/includes/python/pyspark/ml/connect/pipeline.py +262 -0
  140. snowflake/snowpark_connect/includes/python/pyspark/ml/connect/summarizer.py +120 -0
  141. snowflake/snowpark_connect/includes/python/pyspark/ml/connect/tuning.py +579 -0
  142. snowflake/snowpark_connect/includes/python/pyspark/ml/connect/util.py +173 -0
  143. snowflake/snowpark_connect/includes/python/pyspark/ml/deepspeed/__init__.py +16 -0
  144. snowflake/snowpark_connect/includes/python/pyspark/ml/deepspeed/deepspeed_distributor.py +165 -0
  145. snowflake/snowpark_connect/includes/python/pyspark/ml/deepspeed/tests/test_deepspeed_distributor.py +306 -0
  146. snowflake/snowpark_connect/includes/python/pyspark/ml/dl_util.py +150 -0
  147. snowflake/snowpark_connect/includes/python/pyspark/ml/evaluation.py +1166 -0
  148. snowflake/snowpark_connect/includes/python/pyspark/ml/feature.py +7474 -0
  149. snowflake/snowpark_connect/includes/python/pyspark/ml/fpm.py +543 -0
  150. snowflake/snowpark_connect/includes/python/pyspark/ml/functions.py +842 -0
  151. snowflake/snowpark_connect/includes/python/pyspark/ml/image.py +271 -0
  152. snowflake/snowpark_connect/includes/python/pyspark/ml/linalg/__init__.py +1382 -0
  153. snowflake/snowpark_connect/includes/python/pyspark/ml/model_cache.py +55 -0
  154. snowflake/snowpark_connect/includes/python/pyspark/ml/param/__init__.py +602 -0
  155. snowflake/snowpark_connect/includes/python/pyspark/ml/param/_shared_params_code_gen.py +368 -0
  156. snowflake/snowpark_connect/includes/python/pyspark/ml/param/shared.py +878 -0
  157. snowflake/snowpark_connect/includes/python/pyspark/ml/pipeline.py +451 -0
  158. snowflake/snowpark_connect/includes/python/pyspark/ml/recommendation.py +748 -0
  159. snowflake/snowpark_connect/includes/python/pyspark/ml/regression.py +3335 -0
  160. snowflake/snowpark_connect/includes/python/pyspark/ml/stat.py +523 -0
  161. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/__init__.py +16 -0
  162. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_connect_classification.py +53 -0
  163. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_connect_evaluation.py +50 -0
  164. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_connect_feature.py +43 -0
  165. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_connect_function.py +114 -0
  166. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_connect_pipeline.py +47 -0
  167. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_connect_summarizer.py +43 -0
  168. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_connect_tuning.py +46 -0
  169. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_legacy_mode_classification.py +238 -0
  170. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_legacy_mode_evaluation.py +194 -0
  171. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_legacy_mode_feature.py +156 -0
  172. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_legacy_mode_pipeline.py +184 -0
  173. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_legacy_mode_summarizer.py +78 -0
  174. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_legacy_mode_tuning.py +292 -0
  175. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_parity_torch_data_loader.py +50 -0
  176. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_parity_torch_distributor.py +152 -0
  177. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_algorithms.py +456 -0
  178. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_base.py +96 -0
  179. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_dl_util.py +186 -0
  180. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_evaluation.py +77 -0
  181. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_feature.py +401 -0
  182. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_functions.py +528 -0
  183. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_image.py +82 -0
  184. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_linalg.py +409 -0
  185. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_model_cache.py +55 -0
  186. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_param.py +441 -0
  187. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_persistence.py +546 -0
  188. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_pipeline.py +71 -0
  189. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_stat.py +52 -0
  190. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_training_summary.py +494 -0
  191. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_util.py +85 -0
  192. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_wrapper.py +138 -0
  193. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/tuning/__init__.py +16 -0
  194. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/tuning/test_cv_io_basic.py +151 -0
  195. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/tuning/test_cv_io_nested.py +97 -0
  196. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/tuning/test_cv_io_pipeline.py +143 -0
  197. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/tuning/test_tuning.py +551 -0
  198. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/tuning/test_tvs_io_basic.py +137 -0
  199. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/tuning/test_tvs_io_nested.py +96 -0
  200. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/tuning/test_tvs_io_pipeline.py +142 -0
  201. snowflake/snowpark_connect/includes/python/pyspark/ml/torch/__init__.py +16 -0
  202. snowflake/snowpark_connect/includes/python/pyspark/ml/torch/data.py +100 -0
  203. snowflake/snowpark_connect/includes/python/pyspark/ml/torch/distributor.py +1133 -0
  204. snowflake/snowpark_connect/includes/python/pyspark/ml/torch/log_communication.py +198 -0
  205. snowflake/snowpark_connect/includes/python/pyspark/ml/torch/tests/__init__.py +16 -0
  206. snowflake/snowpark_connect/includes/python/pyspark/ml/torch/tests/test_data_loader.py +137 -0
  207. snowflake/snowpark_connect/includes/python/pyspark/ml/torch/tests/test_distributor.py +561 -0
  208. snowflake/snowpark_connect/includes/python/pyspark/ml/torch/tests/test_log_communication.py +172 -0
  209. snowflake/snowpark_connect/includes/python/pyspark/ml/torch/torch_run_process_wrapper.py +83 -0
  210. snowflake/snowpark_connect/includes/python/pyspark/ml/tree.py +434 -0
  211. snowflake/snowpark_connect/includes/python/pyspark/ml/tuning.py +1741 -0
  212. snowflake/snowpark_connect/includes/python/pyspark/ml/util.py +749 -0
  213. snowflake/snowpark_connect/includes/python/pyspark/ml/wrapper.py +465 -0
  214. snowflake/snowpark_connect/includes/python/pyspark/mllib/__init__.py +44 -0
  215. snowflake/snowpark_connect/includes/python/pyspark/mllib/_typing.pyi +33 -0
  216. snowflake/snowpark_connect/includes/python/pyspark/mllib/classification.py +989 -0
  217. snowflake/snowpark_connect/includes/python/pyspark/mllib/clustering.py +1318 -0
  218. snowflake/snowpark_connect/includes/python/pyspark/mllib/common.py +174 -0
  219. snowflake/snowpark_connect/includes/python/pyspark/mllib/evaluation.py +691 -0
  220. snowflake/snowpark_connect/includes/python/pyspark/mllib/feature.py +1085 -0
  221. snowflake/snowpark_connect/includes/python/pyspark/mllib/fpm.py +233 -0
  222. snowflake/snowpark_connect/includes/python/pyspark/mllib/linalg/__init__.py +1653 -0
  223. snowflake/snowpark_connect/includes/python/pyspark/mllib/linalg/distributed.py +1662 -0
  224. snowflake/snowpark_connect/includes/python/pyspark/mllib/random.py +698 -0
  225. snowflake/snowpark_connect/includes/python/pyspark/mllib/recommendation.py +389 -0
  226. snowflake/snowpark_connect/includes/python/pyspark/mllib/regression.py +1067 -0
  227. snowflake/snowpark_connect/includes/python/pyspark/mllib/stat/KernelDensity.py +59 -0
  228. snowflake/snowpark_connect/includes/python/pyspark/mllib/stat/__init__.py +34 -0
  229. snowflake/snowpark_connect/includes/python/pyspark/mllib/stat/_statistics.py +409 -0
  230. snowflake/snowpark_connect/includes/python/pyspark/mllib/stat/distribution.py +39 -0
  231. snowflake/snowpark_connect/includes/python/pyspark/mllib/stat/test.py +86 -0
  232. snowflake/snowpark_connect/includes/python/pyspark/mllib/tests/__init__.py +16 -0
  233. snowflake/snowpark_connect/includes/python/pyspark/mllib/tests/test_algorithms.py +353 -0
  234. snowflake/snowpark_connect/includes/python/pyspark/mllib/tests/test_feature.py +192 -0
  235. snowflake/snowpark_connect/includes/python/pyspark/mllib/tests/test_linalg.py +680 -0
  236. snowflake/snowpark_connect/includes/python/pyspark/mllib/tests/test_stat.py +206 -0
  237. snowflake/snowpark_connect/includes/python/pyspark/mllib/tests/test_streaming_algorithms.py +471 -0
  238. snowflake/snowpark_connect/includes/python/pyspark/mllib/tests/test_util.py +108 -0
  239. snowflake/snowpark_connect/includes/python/pyspark/mllib/tree.py +888 -0
  240. snowflake/snowpark_connect/includes/python/pyspark/mllib/util.py +659 -0
  241. snowflake/snowpark_connect/includes/python/pyspark/pandas/__init__.py +165 -0
  242. snowflake/snowpark_connect/includes/python/pyspark/pandas/_typing.py +52 -0
  243. snowflake/snowpark_connect/includes/python/pyspark/pandas/accessors.py +989 -0
  244. snowflake/snowpark_connect/includes/python/pyspark/pandas/base.py +1804 -0
  245. snowflake/snowpark_connect/includes/python/pyspark/pandas/categorical.py +822 -0
  246. snowflake/snowpark_connect/includes/python/pyspark/pandas/config.py +539 -0
  247. snowflake/snowpark_connect/includes/python/pyspark/pandas/correlation.py +262 -0
  248. snowflake/snowpark_connect/includes/python/pyspark/pandas/data_type_ops/__init__.py +16 -0
  249. snowflake/snowpark_connect/includes/python/pyspark/pandas/data_type_ops/base.py +519 -0
  250. snowflake/snowpark_connect/includes/python/pyspark/pandas/data_type_ops/binary_ops.py +98 -0
  251. snowflake/snowpark_connect/includes/python/pyspark/pandas/data_type_ops/boolean_ops.py +426 -0
  252. snowflake/snowpark_connect/includes/python/pyspark/pandas/data_type_ops/categorical_ops.py +141 -0
  253. snowflake/snowpark_connect/includes/python/pyspark/pandas/data_type_ops/complex_ops.py +145 -0
  254. snowflake/snowpark_connect/includes/python/pyspark/pandas/data_type_ops/date_ops.py +127 -0
  255. snowflake/snowpark_connect/includes/python/pyspark/pandas/data_type_ops/datetime_ops.py +171 -0
  256. snowflake/snowpark_connect/includes/python/pyspark/pandas/data_type_ops/null_ops.py +83 -0
  257. snowflake/snowpark_connect/includes/python/pyspark/pandas/data_type_ops/num_ops.py +588 -0
  258. snowflake/snowpark_connect/includes/python/pyspark/pandas/data_type_ops/string_ops.py +154 -0
  259. snowflake/snowpark_connect/includes/python/pyspark/pandas/data_type_ops/timedelta_ops.py +101 -0
  260. snowflake/snowpark_connect/includes/python/pyspark/pandas/data_type_ops/udt_ops.py +29 -0
  261. snowflake/snowpark_connect/includes/python/pyspark/pandas/datetimes.py +891 -0
  262. snowflake/snowpark_connect/includes/python/pyspark/pandas/exceptions.py +150 -0
  263. snowflake/snowpark_connect/includes/python/pyspark/pandas/extensions.py +388 -0
  264. snowflake/snowpark_connect/includes/python/pyspark/pandas/frame.py +13738 -0
  265. snowflake/snowpark_connect/includes/python/pyspark/pandas/generic.py +3560 -0
  266. snowflake/snowpark_connect/includes/python/pyspark/pandas/groupby.py +4448 -0
  267. snowflake/snowpark_connect/includes/python/pyspark/pandas/indexes/__init__.py +21 -0
  268. snowflake/snowpark_connect/includes/python/pyspark/pandas/indexes/base.py +2783 -0
  269. snowflake/snowpark_connect/includes/python/pyspark/pandas/indexes/category.py +773 -0
  270. snowflake/snowpark_connect/includes/python/pyspark/pandas/indexes/datetimes.py +843 -0
  271. snowflake/snowpark_connect/includes/python/pyspark/pandas/indexes/multi.py +1323 -0
  272. snowflake/snowpark_connect/includes/python/pyspark/pandas/indexes/numeric.py +210 -0
  273. snowflake/snowpark_connect/includes/python/pyspark/pandas/indexes/timedelta.py +197 -0
  274. snowflake/snowpark_connect/includes/python/pyspark/pandas/indexing.py +1862 -0
  275. snowflake/snowpark_connect/includes/python/pyspark/pandas/internal.py +1680 -0
  276. snowflake/snowpark_connect/includes/python/pyspark/pandas/missing/__init__.py +48 -0
  277. snowflake/snowpark_connect/includes/python/pyspark/pandas/missing/common.py +76 -0
  278. snowflake/snowpark_connect/includes/python/pyspark/pandas/missing/frame.py +63 -0
  279. snowflake/snowpark_connect/includes/python/pyspark/pandas/missing/general_functions.py +43 -0
  280. snowflake/snowpark_connect/includes/python/pyspark/pandas/missing/groupby.py +93 -0
  281. snowflake/snowpark_connect/includes/python/pyspark/pandas/missing/indexes.py +184 -0
  282. snowflake/snowpark_connect/includes/python/pyspark/pandas/missing/resample.py +101 -0
  283. snowflake/snowpark_connect/includes/python/pyspark/pandas/missing/scalars.py +29 -0
  284. snowflake/snowpark_connect/includes/python/pyspark/pandas/missing/series.py +69 -0
  285. snowflake/snowpark_connect/includes/python/pyspark/pandas/missing/window.py +168 -0
  286. snowflake/snowpark_connect/includes/python/pyspark/pandas/mlflow.py +238 -0
  287. snowflake/snowpark_connect/includes/python/pyspark/pandas/namespace.py +3807 -0
  288. snowflake/snowpark_connect/includes/python/pyspark/pandas/numpy_compat.py +260 -0
  289. snowflake/snowpark_connect/includes/python/pyspark/pandas/plot/__init__.py +17 -0
  290. snowflake/snowpark_connect/includes/python/pyspark/pandas/plot/core.py +1213 -0
  291. snowflake/snowpark_connect/includes/python/pyspark/pandas/plot/matplotlib.py +928 -0
  292. snowflake/snowpark_connect/includes/python/pyspark/pandas/plot/plotly.py +261 -0
  293. snowflake/snowpark_connect/includes/python/pyspark/pandas/resample.py +816 -0
  294. snowflake/snowpark_connect/includes/python/pyspark/pandas/series.py +7440 -0
  295. snowflake/snowpark_connect/includes/python/pyspark/pandas/sql_formatter.py +308 -0
  296. snowflake/snowpark_connect/includes/python/pyspark/pandas/sql_processor.py +394 -0
  297. snowflake/snowpark_connect/includes/python/pyspark/pandas/strings.py +2371 -0
  298. snowflake/snowpark_connect/includes/python/pyspark/pandas/supported_api_gen.py +378 -0
  299. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/__init__.py +16 -0
  300. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/__init__.py +16 -0
  301. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_any_all.py +177 -0
  302. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_apply_func.py +575 -0
  303. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_binary_ops.py +235 -0
  304. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_combine.py +653 -0
  305. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_compute.py +463 -0
  306. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_corrwith.py +86 -0
  307. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_cov.py +151 -0
  308. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_cumulative.py +139 -0
  309. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_describe.py +458 -0
  310. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_eval.py +86 -0
  311. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_melt.py +202 -0
  312. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_missing_data.py +520 -0
  313. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_pivot.py +361 -0
  314. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/__init__.py +16 -0
  315. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/__init__.py +16 -0
  316. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_any_all.py +40 -0
  317. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_apply_func.py +42 -0
  318. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_binary_ops.py +40 -0
  319. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_combine.py +37 -0
  320. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_compute.py +60 -0
  321. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_corrwith.py +40 -0
  322. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_cov.py +40 -0
  323. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_cumulative.py +90 -0
  324. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_describe.py +40 -0
  325. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_eval.py +40 -0
  326. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_melt.py +40 -0
  327. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_missing_data.py +42 -0
  328. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_pivot.py +37 -0
  329. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/__init__.py +16 -0
  330. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_base.py +36 -0
  331. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_binary_ops.py +42 -0
  332. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_boolean_ops.py +47 -0
  333. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_categorical_ops.py +55 -0
  334. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_complex_ops.py +40 -0
  335. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_date_ops.py +47 -0
  336. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_datetime_ops.py +47 -0
  337. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_null_ops.py +42 -0
  338. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_num_arithmetic.py +43 -0
  339. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_num_ops.py +47 -0
  340. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_num_reverse.py +43 -0
  341. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_string_ops.py +47 -0
  342. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_timedelta_ops.py +47 -0
  343. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_udt_ops.py +40 -0
  344. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/testing_utils.py +226 -0
  345. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/__init__.py +16 -0
  346. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_align.py +39 -0
  347. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_basic_slow.py +55 -0
  348. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_cov_corrwith.py +39 -0
  349. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_dot_frame.py +39 -0
  350. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_dot_series.py +39 -0
  351. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_index.py +39 -0
  352. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_series.py +39 -0
  353. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_setitem_frame.py +43 -0
  354. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_setitem_series.py +43 -0
  355. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/__init__.py +16 -0
  356. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_attrs.py +40 -0
  357. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_constructor.py +39 -0
  358. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_conversion.py +42 -0
  359. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_reindexing.py +42 -0
  360. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_reshaping.py +37 -0
  361. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_spark.py +40 -0
  362. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_take.py +42 -0
  363. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_time_series.py +48 -0
  364. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_truncate.py +40 -0
  365. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/__init__.py +16 -0
  366. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_aggregate.py +40 -0
  367. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_apply_func.py +41 -0
  368. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_cumulative.py +67 -0
  369. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_describe.py +40 -0
  370. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_groupby.py +55 -0
  371. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_head_tail.py +40 -0
  372. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_index.py +38 -0
  373. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_missing_data.py +55 -0
  374. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_split_apply.py +39 -0
  375. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_stat.py +38 -0
  376. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/__init__.py +16 -0
  377. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_align.py +40 -0
  378. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_base.py +50 -0
  379. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_category.py +73 -0
  380. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_datetime.py +39 -0
  381. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_indexing.py +40 -0
  382. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_reindex.py +40 -0
  383. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_rename.py +40 -0
  384. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_reset_index.py +48 -0
  385. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_timedelta.py +39 -0
  386. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/io/__init__.py +16 -0
  387. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/io/test_parity_io.py +40 -0
  388. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/plot/__init__.py +16 -0
  389. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/plot/test_parity_frame_plot.py +45 -0
  390. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/plot/test_parity_frame_plot_matplotlib.py +45 -0
  391. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/plot/test_parity_frame_plot_plotly.py +49 -0
  392. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/plot/test_parity_series_plot.py +37 -0
  393. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/plot/test_parity_series_plot_matplotlib.py +53 -0
  394. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/plot/test_parity_series_plot_plotly.py +45 -0
  395. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/__init__.py +16 -0
  396. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_all_any.py +38 -0
  397. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_arg_ops.py +37 -0
  398. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_as_of.py +37 -0
  399. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_as_type.py +38 -0
  400. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_compute.py +37 -0
  401. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_conversion.py +40 -0
  402. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_cumulative.py +40 -0
  403. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_index.py +38 -0
  404. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_missing_data.py +40 -0
  405. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_series.py +37 -0
  406. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_sort.py +38 -0
  407. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_stat.py +38 -0
  408. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_categorical.py +66 -0
  409. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_config.py +37 -0
  410. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_csv.py +37 -0
  411. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_dataframe_conversion.py +42 -0
  412. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_dataframe_spark_io.py +39 -0
  413. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_default_index.py +49 -0
  414. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_ewm.py +37 -0
  415. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_expanding.py +39 -0
  416. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_extension.py +49 -0
  417. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_frame_spark.py +53 -0
  418. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_generic_functions.py +43 -0
  419. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_indexing.py +49 -0
  420. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_indexops_spark.py +39 -0
  421. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_internal.py +41 -0
  422. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_namespace.py +39 -0
  423. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_numpy_compat.py +60 -0
  424. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_ops_on_diff_frames.py +48 -0
  425. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_ops_on_diff_frames_groupby.py +39 -0
  426. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_ops_on_diff_frames_groupby_expanding.py +44 -0
  427. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_ops_on_diff_frames_groupby_rolling.py +84 -0
  428. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_repr.py +37 -0
  429. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_resample.py +45 -0
  430. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_reshape.py +39 -0
  431. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_rolling.py +39 -0
  432. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_scalars.py +37 -0
  433. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_series_conversion.py +39 -0
  434. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_series_datetime.py +39 -0
  435. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_series_string.py +39 -0
  436. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_spark_functions.py +39 -0
  437. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_sql.py +43 -0
  438. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_stats.py +37 -0
  439. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_typedef.py +36 -0
  440. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_utils.py +37 -0
  441. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_window.py +39 -0
  442. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/__init__.py +16 -0
  443. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_base.py +107 -0
  444. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_binary_ops.py +224 -0
  445. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_boolean_ops.py +825 -0
  446. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_categorical_ops.py +562 -0
  447. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_complex_ops.py +368 -0
  448. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_date_ops.py +257 -0
  449. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_datetime_ops.py +260 -0
  450. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_null_ops.py +178 -0
  451. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_num_arithmetic.py +184 -0
  452. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_num_ops.py +497 -0
  453. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_num_reverse.py +140 -0
  454. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_string_ops.py +354 -0
  455. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_timedelta_ops.py +219 -0
  456. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_udt_ops.py +192 -0
  457. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/testing_utils.py +228 -0
  458. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/__init__.py +16 -0
  459. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_align.py +118 -0
  460. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_basic_slow.py +198 -0
  461. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_cov_corrwith.py +181 -0
  462. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_dot_frame.py +103 -0
  463. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_dot_series.py +141 -0
  464. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_index.py +109 -0
  465. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_series.py +136 -0
  466. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_setitem_frame.py +125 -0
  467. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_setitem_series.py +217 -0
  468. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/__init__.py +16 -0
  469. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_attrs.py +384 -0
  470. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_constructor.py +598 -0
  471. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_conversion.py +73 -0
  472. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_reindexing.py +869 -0
  473. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_reshaping.py +487 -0
  474. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_spark.py +309 -0
  475. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_take.py +156 -0
  476. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_time_series.py +149 -0
  477. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_truncate.py +163 -0
  478. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/__init__.py +16 -0
  479. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_aggregate.py +311 -0
  480. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_apply_func.py +524 -0
  481. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_cumulative.py +419 -0
  482. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_describe.py +144 -0
  483. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_groupby.py +979 -0
  484. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_head_tail.py +234 -0
  485. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_index.py +206 -0
  486. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_missing_data.py +421 -0
  487. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_split_apply.py +187 -0
  488. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_stat.py +397 -0
  489. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/__init__.py +16 -0
  490. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_align.py +100 -0
  491. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_base.py +2743 -0
  492. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_category.py +484 -0
  493. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_datetime.py +276 -0
  494. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_indexing.py +432 -0
  495. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_reindex.py +310 -0
  496. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_rename.py +257 -0
  497. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_reset_index.py +160 -0
  498. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_timedelta.py +128 -0
  499. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/io/__init__.py +16 -0
  500. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/io/test_io.py +137 -0
  501. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/plot/__init__.py +16 -0
  502. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/plot/test_frame_plot.py +170 -0
  503. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/plot/test_frame_plot_matplotlib.py +547 -0
  504. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/plot/test_frame_plot_plotly.py +285 -0
  505. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/plot/test_series_plot.py +106 -0
  506. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/plot/test_series_plot_matplotlib.py +409 -0
  507. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/plot/test_series_plot_plotly.py +247 -0
  508. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/__init__.py +16 -0
  509. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_all_any.py +105 -0
  510. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_arg_ops.py +197 -0
  511. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_as_of.py +137 -0
  512. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_as_type.py +227 -0
  513. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_compute.py +634 -0
  514. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_conversion.py +88 -0
  515. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_cumulative.py +139 -0
  516. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_index.py +475 -0
  517. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_missing_data.py +265 -0
  518. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_series.py +818 -0
  519. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_sort.py +162 -0
  520. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_stat.py +780 -0
  521. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_categorical.py +741 -0
  522. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_config.py +160 -0
  523. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_csv.py +453 -0
  524. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_dataframe_conversion.py +281 -0
  525. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_dataframe_spark_io.py +487 -0
  526. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_default_index.py +109 -0
  527. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_ewm.py +434 -0
  528. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_expanding.py +253 -0
  529. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_extension.py +152 -0
  530. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_frame_spark.py +162 -0
  531. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_generic_functions.py +234 -0
  532. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_indexing.py +1339 -0
  533. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_indexops_spark.py +82 -0
  534. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_internal.py +124 -0
  535. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_namespace.py +638 -0
  536. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_numpy_compat.py +200 -0
  537. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_ops_on_diff_frames.py +1355 -0
  538. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_ops_on_diff_frames_groupby.py +655 -0
  539. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_ops_on_diff_frames_groupby_expanding.py +113 -0
  540. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_ops_on_diff_frames_groupby_rolling.py +118 -0
  541. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_repr.py +192 -0
  542. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_resample.py +346 -0
  543. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_reshape.py +495 -0
  544. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_rolling.py +263 -0
  545. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_scalars.py +59 -0
  546. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_series_conversion.py +85 -0
  547. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_series_datetime.py +364 -0
  548. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_series_string.py +362 -0
  549. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_spark_functions.py +46 -0
  550. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_sql.py +123 -0
  551. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_stats.py +581 -0
  552. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_typedef.py +447 -0
  553. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_utils.py +301 -0
  554. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_window.py +465 -0
  555. snowflake/snowpark_connect/includes/python/pyspark/pandas/typedef/__init__.py +18 -0
  556. snowflake/snowpark_connect/includes/python/pyspark/pandas/typedef/typehints.py +874 -0
  557. snowflake/snowpark_connect/includes/python/pyspark/pandas/usage_logging/__init__.py +143 -0
  558. snowflake/snowpark_connect/includes/python/pyspark/pandas/usage_logging/usage_logger.py +132 -0
  559. snowflake/snowpark_connect/includes/python/pyspark/pandas/utils.py +1063 -0
  560. snowflake/snowpark_connect/includes/python/pyspark/pandas/window.py +2702 -0
  561. snowflake/snowpark_connect/includes/python/pyspark/profiler.py +489 -0
  562. snowflake/snowpark_connect/includes/python/pyspark/py.typed +1 -0
  563. snowflake/snowpark_connect/includes/python/pyspark/python/pyspark/shell.py +123 -0
  564. snowflake/snowpark_connect/includes/python/pyspark/rdd.py +5518 -0
  565. snowflake/snowpark_connect/includes/python/pyspark/rddsampler.py +115 -0
  566. snowflake/snowpark_connect/includes/python/pyspark/resource/__init__.py +38 -0
  567. snowflake/snowpark_connect/includes/python/pyspark/resource/information.py +69 -0
  568. snowflake/snowpark_connect/includes/python/pyspark/resource/profile.py +317 -0
  569. snowflake/snowpark_connect/includes/python/pyspark/resource/requests.py +539 -0
  570. snowflake/snowpark_connect/includes/python/pyspark/resource/tests/__init__.py +16 -0
  571. snowflake/snowpark_connect/includes/python/pyspark/resource/tests/test_resources.py +83 -0
  572. snowflake/snowpark_connect/includes/python/pyspark/resultiterable.py +45 -0
  573. snowflake/snowpark_connect/includes/python/pyspark/serializers.py +681 -0
  574. snowflake/snowpark_connect/includes/python/pyspark/shell.py +123 -0
  575. snowflake/snowpark_connect/includes/python/pyspark/shuffle.py +854 -0
  576. snowflake/snowpark_connect/includes/python/pyspark/sql/__init__.py +75 -0
  577. snowflake/snowpark_connect/includes/python/pyspark/sql/_typing.pyi +80 -0
  578. snowflake/snowpark_connect/includes/python/pyspark/sql/avro/__init__.py +18 -0
  579. snowflake/snowpark_connect/includes/python/pyspark/sql/avro/functions.py +188 -0
  580. snowflake/snowpark_connect/includes/python/pyspark/sql/catalog.py +1270 -0
  581. snowflake/snowpark_connect/includes/python/pyspark/sql/column.py +1431 -0
  582. snowflake/snowpark_connect/includes/python/pyspark/sql/conf.py +99 -0
  583. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/__init__.py +18 -0
  584. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/_typing.py +90 -0
  585. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/avro/__init__.py +18 -0
  586. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/avro/functions.py +107 -0
  587. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/catalog.py +356 -0
  588. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/client/__init__.py +22 -0
  589. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/client/artifact.py +412 -0
  590. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/client/core.py +1689 -0
  591. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/client/reattach.py +340 -0
  592. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/column.py +514 -0
  593. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/conf.py +128 -0
  594. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/conversion.py +490 -0
  595. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/dataframe.py +2172 -0
  596. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/expressions.py +1056 -0
  597. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/functions.py +3937 -0
  598. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/group.py +418 -0
  599. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/plan.py +2289 -0
  600. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/proto/__init__.py +25 -0
  601. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/proto/base_pb2.py +203 -0
  602. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/proto/base_pb2.pyi +2718 -0
  603. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/proto/base_pb2_grpc.py +423 -0
  604. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/proto/catalog_pb2.py +109 -0
  605. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/proto/catalog_pb2.pyi +1130 -0
  606. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/proto/commands_pb2.py +141 -0
  607. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/proto/commands_pb2.pyi +1766 -0
  608. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/proto/common_pb2.py +47 -0
  609. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/proto/common_pb2.pyi +123 -0
  610. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/proto/example_plugins_pb2.py +53 -0
  611. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/proto/example_plugins_pb2.pyi +112 -0
  612. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/proto/expressions_pb2.py +107 -0
  613. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/proto/expressions_pb2.pyi +1507 -0
  614. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/proto/relations_pb2.py +195 -0
  615. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/proto/relations_pb2.pyi +3613 -0
  616. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/proto/types_pb2.py +95 -0
  617. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/proto/types_pb2.pyi +980 -0
  618. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/protobuf/__init__.py +18 -0
  619. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/protobuf/functions.py +166 -0
  620. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/readwriter.py +861 -0
  621. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/session.py +952 -0
  622. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/streaming/__init__.py +22 -0
  623. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/streaming/query.py +295 -0
  624. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/streaming/readwriter.py +618 -0
  625. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/streaming/worker/__init__.py +18 -0
  626. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/streaming/worker/foreach_batch_worker.py +87 -0
  627. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/streaming/worker/listener_worker.py +100 -0
  628. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/types.py +301 -0
  629. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/udf.py +296 -0
  630. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/udtf.py +200 -0
  631. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/utils.py +58 -0
  632. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/window.py +266 -0
  633. snowflake/snowpark_connect/includes/python/pyspark/sql/context.py +818 -0
  634. snowflake/snowpark_connect/includes/python/pyspark/sql/dataframe.py +5973 -0
  635. snowflake/snowpark_connect/includes/python/pyspark/sql/functions.py +15889 -0
  636. snowflake/snowpark_connect/includes/python/pyspark/sql/group.py +547 -0
  637. snowflake/snowpark_connect/includes/python/pyspark/sql/observation.py +152 -0
  638. snowflake/snowpark_connect/includes/python/pyspark/sql/pandas/__init__.py +21 -0
  639. snowflake/snowpark_connect/includes/python/pyspark/sql/pandas/_typing/__init__.pyi +344 -0
  640. snowflake/snowpark_connect/includes/python/pyspark/sql/pandas/_typing/protocols/__init__.pyi +17 -0
  641. snowflake/snowpark_connect/includes/python/pyspark/sql/pandas/_typing/protocols/frame.pyi +20 -0
  642. snowflake/snowpark_connect/includes/python/pyspark/sql/pandas/_typing/protocols/series.pyi +20 -0
  643. snowflake/snowpark_connect/includes/python/pyspark/sql/pandas/conversion.py +671 -0
  644. snowflake/snowpark_connect/includes/python/pyspark/sql/pandas/functions.py +480 -0
  645. snowflake/snowpark_connect/includes/python/pyspark/sql/pandas/functions.pyi +132 -0
  646. snowflake/snowpark_connect/includes/python/pyspark/sql/pandas/group_ops.py +523 -0
  647. snowflake/snowpark_connect/includes/python/pyspark/sql/pandas/map_ops.py +216 -0
  648. snowflake/snowpark_connect/includes/python/pyspark/sql/pandas/serializers.py +1019 -0
  649. snowflake/snowpark_connect/includes/python/pyspark/sql/pandas/typehints.py +172 -0
  650. snowflake/snowpark_connect/includes/python/pyspark/sql/pandas/types.py +972 -0
  651. snowflake/snowpark_connect/includes/python/pyspark/sql/pandas/utils.py +86 -0
  652. snowflake/snowpark_connect/includes/python/pyspark/sql/protobuf/__init__.py +18 -0
  653. snowflake/snowpark_connect/includes/python/pyspark/sql/protobuf/functions.py +334 -0
  654. snowflake/snowpark_connect/includes/python/pyspark/sql/readwriter.py +2159 -0
  655. snowflake/snowpark_connect/includes/python/pyspark/sql/session.py +2088 -0
  656. snowflake/snowpark_connect/includes/python/pyspark/sql/sql_formatter.py +84 -0
  657. snowflake/snowpark_connect/includes/python/pyspark/sql/streaming/__init__.py +21 -0
  658. snowflake/snowpark_connect/includes/python/pyspark/sql/streaming/listener.py +1050 -0
  659. snowflake/snowpark_connect/includes/python/pyspark/sql/streaming/query.py +746 -0
  660. snowflake/snowpark_connect/includes/python/pyspark/sql/streaming/readwriter.py +1652 -0
  661. snowflake/snowpark_connect/includes/python/pyspark/sql/streaming/state.py +288 -0
  662. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/__init__.py +16 -0
  663. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/__init__.py +16 -0
  664. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/client/__init__.py +16 -0
  665. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/client/test_artifact.py +420 -0
  666. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/client/test_client.py +358 -0
  667. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/streaming/__init__.py +16 -0
  668. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/streaming/test_parity_foreach.py +36 -0
  669. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/streaming/test_parity_foreach_batch.py +44 -0
  670. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/streaming/test_parity_listener.py +116 -0
  671. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/streaming/test_parity_streaming.py +35 -0
  672. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_connect_basic.py +3612 -0
  673. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_connect_column.py +1042 -0
  674. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_connect_function.py +2381 -0
  675. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_connect_plan.py +1060 -0
  676. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_arrow.py +163 -0
  677. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_arrow_map.py +38 -0
  678. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_arrow_python_udf.py +48 -0
  679. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_catalog.py +36 -0
  680. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_column.py +55 -0
  681. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_conf.py +36 -0
  682. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_dataframe.py +96 -0
  683. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_datasources.py +44 -0
  684. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_errors.py +36 -0
  685. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_functions.py +59 -0
  686. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_group.py +36 -0
  687. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_pandas_cogrouped_map.py +59 -0
  688. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_pandas_grouped_map.py +74 -0
  689. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_pandas_grouped_map_with_state.py +62 -0
  690. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_pandas_map.py +58 -0
  691. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_pandas_udf.py +70 -0
  692. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_pandas_udf_grouped_agg.py +50 -0
  693. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_pandas_udf_scalar.py +68 -0
  694. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_pandas_udf_window.py +40 -0
  695. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_readwriter.py +46 -0
  696. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_serde.py +44 -0
  697. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_types.py +100 -0
  698. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_udf.py +100 -0
  699. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_udtf.py +163 -0
  700. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_session.py +181 -0
  701. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_utils.py +42 -0
  702. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/__init__.py +16 -0
  703. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_cogrouped_map.py +623 -0
  704. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_grouped_map.py +869 -0
  705. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_grouped_map_with_state.py +342 -0
  706. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_map.py +436 -0
  707. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_udf.py +363 -0
  708. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_udf_grouped_agg.py +592 -0
  709. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_udf_scalar.py +1503 -0
  710. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_udf_typehints.py +392 -0
  711. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_udf_typehints_with_future_annotations.py +375 -0
  712. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_udf_window.py +411 -0
  713. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/streaming/__init__.py +16 -0
  714. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/streaming/test_streaming.py +401 -0
  715. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/streaming/test_streaming_foreach.py +295 -0
  716. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/streaming/test_streaming_foreach_batch.py +106 -0
  717. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/streaming/test_streaming_listener.py +558 -0
  718. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_arrow.py +1346 -0
  719. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_arrow_map.py +182 -0
  720. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_arrow_python_udf.py +202 -0
  721. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_catalog.py +503 -0
  722. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_column.py +225 -0
  723. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_conf.py +83 -0
  724. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_context.py +201 -0
  725. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_dataframe.py +1931 -0
  726. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_datasources.py +256 -0
  727. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_errors.py +69 -0
  728. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_functions.py +1349 -0
  729. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_group.py +53 -0
  730. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_pandas_sqlmetrics.py +68 -0
  731. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_readwriter.py +283 -0
  732. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_serde.py +155 -0
  733. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_session.py +412 -0
  734. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_types.py +1581 -0
  735. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_udf.py +961 -0
  736. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_udf_profiler.py +165 -0
  737. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_udtf.py +1456 -0
  738. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_utils.py +1686 -0
  739. snowflake/snowpark_connect/includes/python/pyspark/sql/types.py +2558 -0
  740. snowflake/snowpark_connect/includes/python/pyspark/sql/udf.py +714 -0
  741. snowflake/snowpark_connect/includes/python/pyspark/sql/udtf.py +325 -0
  742. snowflake/snowpark_connect/includes/python/pyspark/sql/utils.py +339 -0
  743. snowflake/snowpark_connect/includes/python/pyspark/sql/window.py +492 -0
  744. snowflake/snowpark_connect/includes/python/pyspark/statcounter.py +165 -0
  745. snowflake/snowpark_connect/includes/python/pyspark/status.py +112 -0
  746. snowflake/snowpark_connect/includes/python/pyspark/storagelevel.py +97 -0
  747. snowflake/snowpark_connect/includes/python/pyspark/streaming/__init__.py +22 -0
  748. snowflake/snowpark_connect/includes/python/pyspark/streaming/context.py +471 -0
  749. snowflake/snowpark_connect/includes/python/pyspark/streaming/dstream.py +933 -0
  750. snowflake/snowpark_connect/includes/python/pyspark/streaming/kinesis.py +205 -0
  751. snowflake/snowpark_connect/includes/python/pyspark/streaming/listener.py +83 -0
  752. snowflake/snowpark_connect/includes/python/pyspark/streaming/tests/__init__.py +16 -0
  753. snowflake/snowpark_connect/includes/python/pyspark/streaming/tests/test_context.py +184 -0
  754. snowflake/snowpark_connect/includes/python/pyspark/streaming/tests/test_dstream.py +706 -0
  755. snowflake/snowpark_connect/includes/python/pyspark/streaming/tests/test_kinesis.py +118 -0
  756. snowflake/snowpark_connect/includes/python/pyspark/streaming/tests/test_listener.py +160 -0
  757. snowflake/snowpark_connect/includes/python/pyspark/streaming/util.py +168 -0
  758. snowflake/snowpark_connect/includes/python/pyspark/taskcontext.py +502 -0
  759. snowflake/snowpark_connect/includes/python/pyspark/testing/__init__.py +21 -0
  760. snowflake/snowpark_connect/includes/python/pyspark/testing/connectutils.py +199 -0
  761. snowflake/snowpark_connect/includes/python/pyspark/testing/mllibutils.py +30 -0
  762. snowflake/snowpark_connect/includes/python/pyspark/testing/mlutils.py +275 -0
  763. snowflake/snowpark_connect/includes/python/pyspark/testing/objects.py +121 -0
  764. snowflake/snowpark_connect/includes/python/pyspark/testing/pandasutils.py +714 -0
  765. snowflake/snowpark_connect/includes/python/pyspark/testing/sqlutils.py +168 -0
  766. snowflake/snowpark_connect/includes/python/pyspark/testing/streamingutils.py +178 -0
  767. snowflake/snowpark_connect/includes/python/pyspark/testing/utils.py +636 -0
  768. snowflake/snowpark_connect/includes/python/pyspark/tests/__init__.py +16 -0
  769. snowflake/snowpark_connect/includes/python/pyspark/tests/test_appsubmit.py +306 -0
  770. snowflake/snowpark_connect/includes/python/pyspark/tests/test_broadcast.py +196 -0
  771. snowflake/snowpark_connect/includes/python/pyspark/tests/test_conf.py +44 -0
  772. snowflake/snowpark_connect/includes/python/pyspark/tests/test_context.py +346 -0
  773. snowflake/snowpark_connect/includes/python/pyspark/tests/test_daemon.py +89 -0
  774. snowflake/snowpark_connect/includes/python/pyspark/tests/test_install_spark.py +124 -0
  775. snowflake/snowpark_connect/includes/python/pyspark/tests/test_join.py +69 -0
  776. snowflake/snowpark_connect/includes/python/pyspark/tests/test_memory_profiler.py +167 -0
  777. snowflake/snowpark_connect/includes/python/pyspark/tests/test_pin_thread.py +194 -0
  778. snowflake/snowpark_connect/includes/python/pyspark/tests/test_profiler.py +168 -0
  779. snowflake/snowpark_connect/includes/python/pyspark/tests/test_rdd.py +939 -0
  780. snowflake/snowpark_connect/includes/python/pyspark/tests/test_rddbarrier.py +52 -0
  781. snowflake/snowpark_connect/includes/python/pyspark/tests/test_rddsampler.py +66 -0
  782. snowflake/snowpark_connect/includes/python/pyspark/tests/test_readwrite.py +368 -0
  783. snowflake/snowpark_connect/includes/python/pyspark/tests/test_serializers.py +257 -0
  784. snowflake/snowpark_connect/includes/python/pyspark/tests/test_shuffle.py +267 -0
  785. snowflake/snowpark_connect/includes/python/pyspark/tests/test_stage_sched.py +153 -0
  786. snowflake/snowpark_connect/includes/python/pyspark/tests/test_statcounter.py +130 -0
  787. snowflake/snowpark_connect/includes/python/pyspark/tests/test_taskcontext.py +350 -0
  788. snowflake/snowpark_connect/includes/python/pyspark/tests/test_util.py +97 -0
  789. snowflake/snowpark_connect/includes/python/pyspark/tests/test_worker.py +271 -0
  790. snowflake/snowpark_connect/includes/python/pyspark/traceback_utils.py +81 -0
  791. snowflake/snowpark_connect/includes/python/pyspark/util.py +416 -0
  792. snowflake/snowpark_connect/includes/python/pyspark/version.py +19 -0
  793. snowflake/snowpark_connect/includes/python/pyspark/worker.py +1307 -0
  794. snowflake/snowpark_connect/includes/python/pyspark/worker_util.py +46 -0
  795. snowflake/snowpark_connect/proto/__init__.py +10 -0
  796. snowflake/snowpark_connect/proto/control_pb2.py +35 -0
  797. snowflake/snowpark_connect/proto/control_pb2.pyi +38 -0
  798. snowflake/snowpark_connect/proto/control_pb2_grpc.py +183 -0
  799. snowflake/snowpark_connect/proto/snowflake_expression_ext_pb2.py +35 -0
  800. snowflake/snowpark_connect/proto/snowflake_expression_ext_pb2.pyi +53 -0
  801. snowflake/snowpark_connect/proto/snowflake_rdd_pb2.pyi +39 -0
  802. snowflake/snowpark_connect/proto/snowflake_relation_ext_pb2.py +47 -0
  803. snowflake/snowpark_connect/proto/snowflake_relation_ext_pb2.pyi +111 -0
  804. snowflake/snowpark_connect/relation/__init__.py +3 -0
  805. snowflake/snowpark_connect/relation/catalogs/__init__.py +12 -0
  806. snowflake/snowpark_connect/relation/catalogs/abstract_spark_catalog.py +287 -0
  807. snowflake/snowpark_connect/relation/catalogs/snowflake_catalog.py +467 -0
  808. snowflake/snowpark_connect/relation/catalogs/utils.py +51 -0
  809. snowflake/snowpark_connect/relation/io_utils.py +76 -0
  810. snowflake/snowpark_connect/relation/map_aggregate.py +322 -0
  811. snowflake/snowpark_connect/relation/map_catalog.py +151 -0
  812. snowflake/snowpark_connect/relation/map_column_ops.py +1068 -0
  813. snowflake/snowpark_connect/relation/map_crosstab.py +48 -0
  814. snowflake/snowpark_connect/relation/map_extension.py +412 -0
  815. snowflake/snowpark_connect/relation/map_join.py +341 -0
  816. snowflake/snowpark_connect/relation/map_local_relation.py +326 -0
  817. snowflake/snowpark_connect/relation/map_map_partitions.py +146 -0
  818. snowflake/snowpark_connect/relation/map_relation.py +253 -0
  819. snowflake/snowpark_connect/relation/map_row_ops.py +716 -0
  820. snowflake/snowpark_connect/relation/map_sample_by.py +35 -0
  821. snowflake/snowpark_connect/relation/map_show_string.py +50 -0
  822. snowflake/snowpark_connect/relation/map_sql.py +1874 -0
  823. snowflake/snowpark_connect/relation/map_stats.py +324 -0
  824. snowflake/snowpark_connect/relation/map_subquery_alias.py +32 -0
  825. snowflake/snowpark_connect/relation/map_udtf.py +288 -0
  826. snowflake/snowpark_connect/relation/read/__init__.py +7 -0
  827. snowflake/snowpark_connect/relation/read/jdbc_read_dbapi.py +668 -0
  828. snowflake/snowpark_connect/relation/read/map_read.py +367 -0
  829. snowflake/snowpark_connect/relation/read/map_read_csv.py +142 -0
  830. snowflake/snowpark_connect/relation/read/map_read_jdbc.py +108 -0
  831. snowflake/snowpark_connect/relation/read/map_read_json.py +344 -0
  832. snowflake/snowpark_connect/relation/read/map_read_parquet.py +194 -0
  833. snowflake/snowpark_connect/relation/read/map_read_socket.py +59 -0
  834. snowflake/snowpark_connect/relation/read/map_read_table.py +109 -0
  835. snowflake/snowpark_connect/relation/read/map_read_text.py +106 -0
  836. snowflake/snowpark_connect/relation/read/reader_config.py +399 -0
  837. snowflake/snowpark_connect/relation/read/utils.py +155 -0
  838. snowflake/snowpark_connect/relation/stage_locator.py +161 -0
  839. snowflake/snowpark_connect/relation/utils.py +219 -0
  840. snowflake/snowpark_connect/relation/write/__init__.py +3 -0
  841. snowflake/snowpark_connect/relation/write/jdbc_write_dbapi.py +339 -0
  842. snowflake/snowpark_connect/relation/write/map_write.py +436 -0
  843. snowflake/snowpark_connect/relation/write/map_write_jdbc.py +48 -0
  844. snowflake/snowpark_connect/resources/java_udfs-1.0-SNAPSHOT.jar +0 -0
  845. snowflake/snowpark_connect/resources_initializer.py +75 -0
  846. snowflake/snowpark_connect/server.py +1136 -0
  847. snowflake/snowpark_connect/start_server.py +32 -0
  848. snowflake/snowpark_connect/tcm.py +8 -0
  849. snowflake/snowpark_connect/type_mapping.py +1003 -0
  850. snowflake/snowpark_connect/typed_column.py +94 -0
  851. snowflake/snowpark_connect/utils/__init__.py +3 -0
  852. snowflake/snowpark_connect/utils/artifacts.py +48 -0
  853. snowflake/snowpark_connect/utils/attribute_handling.py +72 -0
  854. snowflake/snowpark_connect/utils/cache.py +84 -0
  855. snowflake/snowpark_connect/utils/concurrent.py +124 -0
  856. snowflake/snowpark_connect/utils/context.py +390 -0
  857. snowflake/snowpark_connect/utils/describe_query_cache.py +231 -0
  858. snowflake/snowpark_connect/utils/interrupt.py +85 -0
  859. snowflake/snowpark_connect/utils/io_utils.py +35 -0
  860. snowflake/snowpark_connect/utils/pandas_udtf_utils.py +117 -0
  861. snowflake/snowpark_connect/utils/profiling.py +47 -0
  862. snowflake/snowpark_connect/utils/session.py +180 -0
  863. snowflake/snowpark_connect/utils/snowpark_connect_logging.py +38 -0
  864. snowflake/snowpark_connect/utils/telemetry.py +513 -0
  865. snowflake/snowpark_connect/utils/udf_cache.py +392 -0
  866. snowflake/snowpark_connect/utils/udf_helper.py +328 -0
  867. snowflake/snowpark_connect/utils/udf_utils.py +310 -0
  868. snowflake/snowpark_connect/utils/udtf_helper.py +420 -0
  869. snowflake/snowpark_connect/utils/udtf_utils.py +799 -0
  870. snowflake/snowpark_connect/utils/xxhash64.py +247 -0
  871. snowflake/snowpark_connect/version.py +6 -0
  872. snowpark_connect-0.20.2.data/scripts/snowpark-connect +71 -0
  873. snowpark_connect-0.20.2.data/scripts/snowpark-session +11 -0
  874. snowpark_connect-0.20.2.data/scripts/snowpark-submit +354 -0
  875. snowpark_connect-0.20.2.dist-info/METADATA +37 -0
  876. snowpark_connect-0.20.2.dist-info/RECORD +879 -0
  877. snowpark_connect-0.20.2.dist-info/WHEEL +5 -0
  878. snowpark_connect-0.20.2.dist-info/licenses/LICENSE.txt +202 -0
  879. snowpark_connect-0.20.2.dist-info/top_level.txt +1 -0
@@ -0,0 +1,1349 @@
1
+ #
2
+ # Licensed to the Apache Software Foundation (ASF) under one or more
3
+ # contributor license agreements. See the NOTICE file distributed with
4
+ # this work for additional information regarding copyright ownership.
5
+ # The ASF licenses this file to You under the Apache License, Version 2.0
6
+ # (the "License"); you may not use this file except in compliance with
7
+ # the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing, software
12
+ # distributed under the License is distributed on an "AS IS" BASIS,
13
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ # See the License for the specific language governing permissions and
15
+ # limitations under the License.
16
+ #
17
+
18
+ from contextlib import redirect_stdout
19
+ import datetime
20
+ from inspect import getmembers, isfunction
21
+ import io
22
+ from itertools import chain
23
+ import math
24
+ import re
25
+ import unittest
26
+
27
+ from py4j.protocol import Py4JJavaError
28
+
29
+ from pyspark.errors import PySparkTypeError, PySparkValueError
30
+ from pyspark.sql import Row, Window, functions as F, types
31
+ from pyspark.sql.column import Column
32
+ from pyspark.testing.sqlutils import ReusedSQLTestCase, SQLTestUtils
33
+ from pyspark.testing.utils import have_numpy
34
+
35
+
36
+ class FunctionsTestsMixin:
37
+ def test_function_parity(self):
38
+ # This test compares the available list of functions in pyspark.sql.functions with those
39
+ # available in the Scala/Java DataFrame API in org.apache.spark.sql.functions.
40
+ #
41
+ # NOTE FOR DEVELOPERS:
42
+ # If this test fails one of the following needs to happen
43
+ # * If a function was added to org.apache.spark.sql.functions it either needs to be added to
44
+ # pyspark.sql.functions or added to the below expected_missing_in_py set.
45
+ # * If a function was added to pyspark.sql.functions that was already in
46
+ # org.apache.spark.sql.functions then it needs to be removed from expected_missing_in_py
47
+ # below. If the function has a different name it needs to be added to py_equiv_jvm
48
+ # mapping.
49
+ # * If it's not related to an added/removed function then likely the exclusion list
50
+ # jvm_excluded_fn needs to be updated.
51
+
52
+ jvm_fn_set = {name for (name, value) in getmembers(self.sc._jvm.functions)}
53
+ py_fn_set = {name for (name, value) in getmembers(F, isfunction) if name[0] != "_"}
54
+
55
+ # Functions on the JVM side we do not expect to be available in python because they are
56
+ # depreciated, irrelevant to python, or have equivalents.
57
+ jvm_excluded_fn = [
58
+ "callUDF", # depreciated, use call_udf
59
+ "typedlit", # Scala only
60
+ "typedLit", # Scala only
61
+ "monotonicallyIncreasingId", # depreciated, use monotonically_increasing_id
62
+ "not", # equivalent to python ~expression
63
+ "any", # equivalent to python ~some
64
+ "len", # equivalent to python ~length
65
+ "udaf", # used for creating UDAF's which are not supported in PySpark
66
+ "random", # namespace conflict with python built-in module
67
+ "uuid", # namespace conflict with python built-in module
68
+ "chr", # namespace conflict with python built-in function
69
+ ]
70
+
71
+ jvm_fn_set.difference_update(jvm_excluded_fn)
72
+
73
+ # For functions that are named differently in pyspark this is the mapping of their
74
+ # python name to the JVM equivalent
75
+ py_equiv_jvm = {"create_map": "map"}
76
+ for py_name, jvm_name in py_equiv_jvm.items():
77
+ if py_name in py_fn_set:
78
+ py_fn_set.remove(py_name)
79
+ py_fn_set.add(jvm_name)
80
+
81
+ missing_in_py = jvm_fn_set.difference(py_fn_set)
82
+
83
+ # Functions that we expect to be missing in python until they are added to pyspark
84
+ expected_missing_in_py = set()
85
+
86
+ self.assertEqual(
87
+ expected_missing_in_py, missing_in_py, "Missing functions in pyspark not as expected"
88
+ )
89
+
90
+ def test_explode(self):
91
+ d = [
92
+ Row(a=1, intlist=[1, 2, 3], mapfield={"a": "b"}),
93
+ Row(a=1, intlist=[], mapfield={}),
94
+ Row(a=1, intlist=None, mapfield=None),
95
+ ]
96
+ data = self.spark.createDataFrame(d)
97
+
98
+ result = data.select(F.explode(data.intlist).alias("a")).select("a").collect()
99
+ self.assertEqual(result[0][0], 1)
100
+ self.assertEqual(result[1][0], 2)
101
+ self.assertEqual(result[2][0], 3)
102
+
103
+ result = data.select(F.explode(data.mapfield).alias("a", "b")).select("a", "b").collect()
104
+ self.assertEqual(result[0][0], "a")
105
+ self.assertEqual(result[0][1], "b")
106
+
107
+ result = [tuple(x) for x in data.select(F.posexplode_outer("intlist")).collect()]
108
+ self.assertEqual(result, [(0, 1), (1, 2), (2, 3), (None, None), (None, None)])
109
+
110
+ result = [tuple(x) for x in data.select(F.posexplode_outer("mapfield")).collect()]
111
+ self.assertEqual(result, [(0, "a", "b"), (None, None, None), (None, None, None)])
112
+
113
+ result = [x[0] for x in data.select(F.explode_outer("intlist")).collect()]
114
+ self.assertEqual(result, [1, 2, 3, None, None])
115
+
116
+ result = [tuple(x) for x in data.select(F.explode_outer("mapfield")).collect()]
117
+ self.assertEqual(result, [("a", "b"), (None, None), (None, None)])
118
+
119
+ def test_inline(self):
120
+ d = [
121
+ Row(structlist=[Row(b=1, c=2), Row(b=3, c=4)]),
122
+ Row(structlist=[Row(b=None, c=5), None]),
123
+ Row(structlist=[]),
124
+ ]
125
+ data = self.spark.createDataFrame(d)
126
+
127
+ result = [tuple(x) for x in data.select(F.inline(data.structlist)).collect()]
128
+ self.assertEqual(result, [(1, 2), (3, 4), (None, 5), (None, None)])
129
+
130
+ result = [tuple(x) for x in data.select(F.inline_outer(data.structlist)).collect()]
131
+ self.assertEqual(result, [(1, 2), (3, 4), (None, 5), (None, None), (None, None)])
132
+
133
+ def test_basic_functions(self):
134
+ rdd = self.sc.parallelize(['{"foo":"bar"}', '{"foo":"baz"}'])
135
+ df = self.spark.read.json(rdd)
136
+ df.count()
137
+ df.collect()
138
+ df.schema
139
+
140
+ # cache and checkpoint
141
+ self.assertFalse(df.is_cached)
142
+ df.persist()
143
+ df.unpersist(True)
144
+ df.cache()
145
+ self.assertTrue(df.is_cached)
146
+ self.assertEqual(2, df.count())
147
+
148
+ with self.tempView("temp"):
149
+ df.createOrReplaceTempView("temp")
150
+ df = self.spark.sql("select foo from temp")
151
+ df.count()
152
+ df.collect()
153
+
154
+ def test_corr(self):
155
+ df = self.spark.createDataFrame([Row(a=i, b=math.sqrt(i)) for i in range(10)])
156
+ corr = df.stat.corr("a", "b")
157
+ self.assertTrue(abs(corr - 0.95734012) < 1e-6)
158
+
159
+ def test_sampleby(self):
160
+ df = self.spark.createDataFrame([Row(a=i, b=(i % 3)) for i in range(100)])
161
+ sampled = df.stat.sampleBy("b", fractions={0: 0.5, 1: 0.5}, seed=0)
162
+ self.assertTrue(35 <= sampled.count() <= 36)
163
+
164
+ with self.assertRaises(PySparkTypeError) as pe:
165
+ df.sampleBy(10, fractions={0: 0.5, 1: 0.5})
166
+
167
+ self.check_error(
168
+ exception=pe.exception,
169
+ error_class="NOT_COLUMN_OR_STR",
170
+ message_parameters={"arg_name": "col", "arg_type": "int"},
171
+ )
172
+
173
+ with self.assertRaises(PySparkTypeError) as pe:
174
+ df.sampleBy("b", fractions=[0.5, 0.5])
175
+
176
+ self.check_error(
177
+ exception=pe.exception,
178
+ error_class="NOT_DICT",
179
+ message_parameters={"arg_name": "fractions", "arg_type": "list"},
180
+ )
181
+
182
+ with self.assertRaises(PySparkTypeError) as pe:
183
+ df.sampleBy("b", fractions={None: 0.5, 1: 0.5})
184
+
185
+ self.check_error(
186
+ exception=pe.exception,
187
+ error_class="DISALLOWED_TYPE_FOR_CONTAINER",
188
+ message_parameters={
189
+ "arg_name": "fractions",
190
+ "arg_type": "dict",
191
+ "allowed_types": "float, int, str",
192
+ "return_type": "NoneType",
193
+ },
194
+ )
195
+
196
+ def test_cov(self):
197
+ df = self.spark.createDataFrame([Row(a=i, b=2 * i) for i in range(10)])
198
+ cov = df.stat.cov("a", "b")
199
+ self.assertTrue(abs(cov - 55.0 / 3) < 1e-6)
200
+
201
+ with self.assertRaises(PySparkTypeError) as pe:
202
+ df.stat.cov(10, "b")
203
+
204
+ self.check_error(
205
+ exception=pe.exception,
206
+ error_class="NOT_STR",
207
+ message_parameters={"arg_name": "col1", "arg_type": "int"},
208
+ )
209
+
210
+ with self.assertRaises(PySparkTypeError) as pe:
211
+ df.stat.cov("a", True)
212
+
213
+ self.check_error(
214
+ exception=pe.exception,
215
+ error_class="NOT_STR",
216
+ message_parameters={"arg_name": "col2", "arg_type": "bool"},
217
+ )
218
+
219
+ def test_crosstab(self):
220
+ df = self.spark.createDataFrame([Row(a=i % 3, b=i % 2) for i in range(1, 7)])
221
+ ct = df.stat.crosstab("a", "b").collect()
222
+ ct = sorted(ct, key=lambda x: x[0])
223
+ for i, row in enumerate(ct):
224
+ self.assertEqual(row[0], str(i))
225
+ self.assertTrue(row[1], 1)
226
+ self.assertTrue(row[2], 1)
227
+
228
+ def test_math_functions(self):
229
+ df = self.spark.createDataFrame([Row(a=i, b=2 * i) for i in range(10)])
230
+
231
+ SQLTestUtils.assert_close(
232
+ [math.cos(i) for i in range(10)], df.select(F.cos(df.a)).collect()
233
+ )
234
+ SQLTestUtils.assert_close([math.cos(i) for i in range(10)], df.select(F.cos("a")).collect())
235
+ SQLTestUtils.assert_close(
236
+ [math.sin(i) for i in range(10)], df.select(F.sin(df.a)).collect()
237
+ )
238
+ SQLTestUtils.assert_close(
239
+ [math.sin(i) for i in range(10)], df.select(F.sin(df["a"])).collect()
240
+ )
241
+ SQLTestUtils.assert_close(
242
+ [math.pow(i, 2 * i) for i in range(10)], df.select(F.pow(df.a, df.b)).collect()
243
+ )
244
+ SQLTestUtils.assert_close(
245
+ [math.pow(i, 2) for i in range(10)], df.select(F.pow(df.a, 2)).collect()
246
+ )
247
+ SQLTestUtils.assert_close(
248
+ [math.pow(i, 2) for i in range(10)], df.select(F.pow(df.a, 2.0)).collect()
249
+ )
250
+ SQLTestUtils.assert_close(
251
+ [math.hypot(i, 2 * i) for i in range(10)], df.select(F.hypot(df.a, df.b)).collect()
252
+ )
253
+ SQLTestUtils.assert_close(
254
+ [math.hypot(i, 2 * i) for i in range(10)], df.select(F.hypot("a", "b")).collect()
255
+ )
256
+ SQLTestUtils.assert_close(
257
+ [math.hypot(i, 2) for i in range(10)], df.select(F.hypot("a", 2)).collect()
258
+ )
259
+ SQLTestUtils.assert_close(
260
+ [math.hypot(i, 2) for i in range(10)], df.select(F.hypot(df.a, 2)).collect()
261
+ )
262
+
263
+ def test_inverse_trig_functions(self):
264
+ df = self.spark.createDataFrame([Row(a=i * 0.2, b=i * -0.2) for i in range(10)])
265
+
266
+ def check(trig, inv, y_axis_symmetrical):
267
+ SQLTestUtils.assert_close(
268
+ [n * 0.2 for n in range(10)],
269
+ df.select(inv(trig(df.a))).collect(),
270
+ )
271
+ if y_axis_symmetrical:
272
+ SQLTestUtils.assert_close(
273
+ [n * 0.2 for n in range(10)],
274
+ df.select(inv(trig(df.b))).collect(),
275
+ )
276
+ else:
277
+ SQLTestUtils.assert_close(
278
+ [n * -0.2 for n in range(10)],
279
+ df.select(inv(trig(df.b))).collect(),
280
+ )
281
+
282
+ check(F.cosh, F.acosh, y_axis_symmetrical=True)
283
+ check(F.sinh, F.asinh, y_axis_symmetrical=False)
284
+ check(F.tanh, F.atanh, y_axis_symmetrical=False)
285
+
286
+ def test_reciprocal_trig_functions(self):
287
+ # SPARK-36683: Tests for reciprocal trig functions (SEC, CSC and COT)
288
+ lst = [
289
+ 0.0,
290
+ math.pi / 6,
291
+ math.pi / 4,
292
+ math.pi / 3,
293
+ math.pi / 2,
294
+ math.pi,
295
+ 3 * math.pi / 2,
296
+ 2 * math.pi,
297
+ ]
298
+
299
+ df = self.spark.createDataFrame(lst, types.DoubleType())
300
+
301
+ def to_reciprocal_trig(func):
302
+ return [1.0 / func(i) if func(i) != 0 else math.inf for i in lst]
303
+
304
+ SQLTestUtils.assert_close(
305
+ to_reciprocal_trig(math.cos), df.select(F.sec(df.value)).collect()
306
+ )
307
+ SQLTestUtils.assert_close(
308
+ to_reciprocal_trig(math.sin), df.select(F.csc(df.value)).collect()
309
+ )
310
+ SQLTestUtils.assert_close(
311
+ to_reciprocal_trig(math.tan), df.select(F.cot(df.value)).collect()
312
+ )
313
+
314
+ def test_rand_functions(self):
315
+ df = self.spark.createDataFrame([Row(key=i, value=str(i)) for i in range(100)])
316
+
317
+ rnd = df.select("key", F.rand()).collect()
318
+ for row in rnd:
319
+ assert row[1] >= 0.0 and row[1] <= 1.0, "got: %s" % row[1]
320
+ rndn = df.select("key", F.randn(5)).collect()
321
+ for row in rndn:
322
+ assert row[1] >= -4.0 and row[1] <= 4.0, "got: %s" % row[1]
323
+
324
+ # If the specified seed is 0, we should use it.
325
+ # https://issues.apache.org/jira/browse/SPARK-9691
326
+ rnd1 = df.select("key", F.rand(0)).collect()
327
+ rnd2 = df.select("key", F.rand(0)).collect()
328
+ self.assertEqual(sorted(rnd1), sorted(rnd2))
329
+
330
+ rndn1 = df.select("key", F.randn(0)).collect()
331
+ rndn2 = df.select("key", F.randn(0)).collect()
332
+ self.assertEqual(sorted(rndn1), sorted(rndn2))
333
+
334
+ def test_string_functions(self):
335
+ string_functions = [
336
+ "upper",
337
+ "lower",
338
+ "ascii",
339
+ "base64",
340
+ "unbase64",
341
+ "ltrim",
342
+ "rtrim",
343
+ "trim",
344
+ ]
345
+
346
+ df = self.spark.createDataFrame([["nick"]], schema=["name"])
347
+ with self.assertRaises(PySparkTypeError) as pe:
348
+ df.select(F.col("name").substr(0, F.lit(1)))
349
+
350
+ self.check_error(
351
+ exception=pe.exception,
352
+ error_class="NOT_SAME_TYPE",
353
+ message_parameters={
354
+ "arg_name1": "startPos",
355
+ "arg_name2": "length",
356
+ "arg_type1": "int",
357
+ "arg_type2": "Column",
358
+ },
359
+ )
360
+
361
+ for name in string_functions:
362
+ self.assertEqual(
363
+ df.select(getattr(F, name)("name")).first()[0],
364
+ df.select(getattr(F, name)(F.col("name"))).first()[0],
365
+ )
366
+
367
+ def test_octet_length_function(self):
368
+ # SPARK-36751: add octet length api for python
369
+ df = self.spark.createDataFrame([("cat",), ("\U0001F408",)], ["cat"])
370
+ actual = df.select(F.octet_length("cat")).collect()
371
+ self.assertEqual([Row(3), Row(4)], actual)
372
+
373
+ def test_bit_length_function(self):
374
+ # SPARK-36751: add bit length api for python
375
+ df = self.spark.createDataFrame([("cat",), ("\U0001F408",)], ["cat"])
376
+ actual = df.select(F.bit_length("cat")).collect()
377
+ self.assertEqual([Row(24), Row(32)], actual)
378
+
379
+ def test_array_contains_function(self):
380
+ df = self.spark.createDataFrame([(["1", "2", "3"],), ([],)], ["data"])
381
+ actual = df.select(F.array_contains(df.data, "1").alias("b")).collect()
382
+ self.assertEqual([Row(b=True), Row(b=False)], actual)
383
+
384
+ def test_levenshtein_function(self):
385
+ df = self.spark.createDataFrame([("kitten", "sitting")], ["l", "r"])
386
+ actual_without_threshold = df.select(F.levenshtein(df.l, df.r).alias("b")).collect()
387
+ self.assertEqual([Row(b=3)], actual_without_threshold)
388
+ actual_with_threshold = df.select(F.levenshtein(df.l, df.r, 2).alias("b")).collect()
389
+ self.assertEqual([Row(b=-1)], actual_with_threshold)
390
+
391
+ def test_between_function(self):
392
+ df = self.spark.createDataFrame(
393
+ [Row(a=1, b=2, c=3), Row(a=2, b=1, c=3), Row(a=4, b=1, c=4)]
394
+ )
395
+ self.assertEqual(
396
+ [Row(a=2, b=1, c=3), Row(a=4, b=1, c=4)], df.filter(df.a.between(df.b, df.c)).collect()
397
+ )
398
+
399
+ def test_dayofweek(self):
400
+ dt = datetime.datetime(2017, 11, 6)
401
+ df = self.spark.createDataFrame([Row(date=dt)])
402
+ row = df.select(F.dayofweek(df.date)).first()
403
+ self.assertEqual(row[0], 2)
404
+
405
+ # Test added for SPARK-37738; change Python API to accept both col & int as input
406
+ def test_date_add_function(self):
407
+ dt = datetime.date(2021, 12, 27)
408
+
409
+ # Note; number var in Python gets converted to LongType column;
410
+ # this is not supported by the function, so cast to Integer explicitly
411
+ df = self.spark.createDataFrame([Row(date=dt, add=2)], "date date, add integer")
412
+
413
+ self.assertTrue(
414
+ all(
415
+ df.select(
416
+ F.date_add(df.date, df.add) == datetime.date(2021, 12, 29),
417
+ F.date_add(df.date, "add") == datetime.date(2021, 12, 29),
418
+ F.date_add(df.date, 3) == datetime.date(2021, 12, 30),
419
+ ).first()
420
+ )
421
+ )
422
+
423
+ # Test added for SPARK-37738; change Python API to accept both col & int as input
424
+ def test_date_sub_function(self):
425
+ dt = datetime.date(2021, 12, 27)
426
+
427
+ # Note; number var in Python gets converted to LongType column;
428
+ # this is not supported by the function, so cast to Integer explicitly
429
+ df = self.spark.createDataFrame([Row(date=dt, sub=2)], "date date, sub integer")
430
+
431
+ self.assertTrue(
432
+ all(
433
+ df.select(
434
+ F.date_sub(df.date, df.sub) == datetime.date(2021, 12, 25),
435
+ F.date_sub(df.date, "sub") == datetime.date(2021, 12, 25),
436
+ F.date_sub(df.date, 3) == datetime.date(2021, 12, 24),
437
+ ).first()
438
+ )
439
+ )
440
+
441
+ # Test added for SPARK-37738; change Python API to accept both col & int as input
442
+ def test_add_months_function(self):
443
+ dt = datetime.date(2021, 12, 27)
444
+
445
+ # Note; number in Python gets converted to LongType column;
446
+ # this is not supported by the function, so cast to Integer explicitly
447
+ df = self.spark.createDataFrame([Row(date=dt, add=2)], "date date, add integer")
448
+
449
+ self.assertTrue(
450
+ all(
451
+ df.select(
452
+ F.add_months(df.date, df.add) == datetime.date(2022, 2, 27),
453
+ F.add_months(df.date, "add") == datetime.date(2022, 2, 27),
454
+ F.add_months(df.date, 3) == datetime.date(2022, 3, 27),
455
+ ).first()
456
+ )
457
+ )
458
+
459
+ def test_make_date(self):
460
+ # SPARK-36554: expose make_date expression
461
+ df = self.spark.createDataFrame([(2020, 6, 26)], ["Y", "M", "D"])
462
+ row_from_col = df.select(F.make_date(df.Y, df.M, df.D)).first()
463
+ self.assertEqual(row_from_col[0], datetime.date(2020, 6, 26))
464
+ row_from_name = df.select(F.make_date("Y", "M", "D")).first()
465
+ self.assertEqual(row_from_name[0], datetime.date(2020, 6, 26))
466
+
467
+ def test_expr(self):
468
+ row = Row(a="length string", b=75)
469
+ df = self.spark.createDataFrame([row])
470
+ result = df.select(F.expr("length(a)")).collect()[0].asDict()
471
+ self.assertEqual(13, result["length(a)"])
472
+
473
+ # add test for SPARK-10577 (test broadcast join hint)
474
+ def test_functions_broadcast(self):
475
+ df1 = self.spark.createDataFrame([(1, "1"), (2, "2")], ("key", "value"))
476
+ df2 = self.spark.createDataFrame([(1, "1"), (2, "2")], ("key", "value"))
477
+
478
+ # equijoin - should be converted into broadcast join
479
+ with io.StringIO() as buf, redirect_stdout(buf):
480
+ df1.join(F.broadcast(df2), "key").explain(True)
481
+ self.assertGreaterEqual(buf.getvalue().count("Broadcast"), 1)
482
+
483
+ # no join key -- should not be a broadcast join
484
+ with io.StringIO() as buf, redirect_stdout(buf):
485
+ df1.crossJoin(F.broadcast(df2)).explain(True)
486
+ self.assertGreaterEqual(buf.getvalue().count("Broadcast"), 1)
487
+
488
+ # planner should not crash without a join
489
+ F.broadcast(df1).explain(True)
490
+
491
+ def test_first_last_ignorenulls(self):
492
+ df = self.spark.range(0, 100)
493
+ df2 = df.select(F.when(df.id % 3 == 0, None).otherwise(df.id).alias("id"))
494
+ df3 = df2.select(
495
+ F.first(df2.id, False).alias("a"),
496
+ F.first(df2.id, True).alias("b"),
497
+ F.last(df2.id, False).alias("c"),
498
+ F.last(df2.id, True).alias("d"),
499
+ )
500
+ self.assertEqual([Row(a=None, b=1, c=None, d=98)], df3.collect())
501
+
502
+ def test_approxQuantile(self):
503
+ df = self.spark.createDataFrame([Row(a=i, b=i + 10) for i in range(10)])
504
+ for f in ["a", "a"]:
505
+ aq = df.stat.approxQuantile(f, [0.1, 0.5, 0.9], 0.1)
506
+ self.assertTrue(isinstance(aq, list))
507
+ self.assertEqual(len(aq), 3)
508
+ self.assertTrue(all(isinstance(q, float) for q in aq))
509
+ aqs = df.stat.approxQuantile(["a", "b"], [0.1, 0.5, 0.9], 0.1)
510
+ self.assertTrue(isinstance(aqs, list))
511
+ self.assertEqual(len(aqs), 2)
512
+ self.assertTrue(isinstance(aqs[0], list))
513
+ self.assertEqual(len(aqs[0]), 3)
514
+ self.assertTrue(all(isinstance(q, float) for q in aqs[0]))
515
+ self.assertTrue(isinstance(aqs[1], list))
516
+ self.assertEqual(len(aqs[1]), 3)
517
+ self.assertTrue(all(isinstance(q, float) for q in aqs[1]))
518
+ aqt = df.stat.approxQuantile(("a", "b"), [0.1, 0.5, 0.9], 0.1)
519
+ self.assertTrue(isinstance(aqt, list))
520
+ self.assertEqual(len(aqt), 2)
521
+ self.assertTrue(isinstance(aqt[0], list))
522
+ self.assertEqual(len(aqt[0]), 3)
523
+ self.assertTrue(all(isinstance(q, float) for q in aqt[0]))
524
+ self.assertTrue(isinstance(aqt[1], list))
525
+ self.assertEqual(len(aqt[1]), 3)
526
+ self.assertTrue(all(isinstance(q, float) for q in aqt[1]))
527
+ self.assertRaises(TypeError, lambda: df.stat.approxQuantile(123, [0.1, 0.9], 0.1))
528
+ self.assertRaises(TypeError, lambda: df.stat.approxQuantile(("a", 123), [0.1, 0.9], 0.1))
529
+ self.assertRaises(TypeError, lambda: df.stat.approxQuantile(["a", 123], [0.1, 0.9], 0.1))
530
+
531
+ def test_sorting_functions_with_column(self):
532
+ self.check_sorting_functions_with_column(Column)
533
+
534
+ def check_sorting_functions_with_column(self, tpe):
535
+ funs = [F.asc_nulls_first, F.asc_nulls_last, F.desc_nulls_first, F.desc_nulls_last]
536
+ exprs = [F.col("x"), "x"]
537
+
538
+ for fun in funs:
539
+ for _expr in exprs:
540
+ res = fun(_expr)
541
+ self.assertIsInstance(res, tpe)
542
+ self.assertIn(f"""'x {fun.__name__.replace("_", " ").upper()}'""", str(res))
543
+
544
+ for _expr in exprs:
545
+ res = F.asc(_expr)
546
+ self.assertIsInstance(res, tpe)
547
+ self.assertIn("""'x ASC NULLS FIRST'""", str(res))
548
+
549
+ for _expr in exprs:
550
+ res = F.desc(_expr)
551
+ self.assertIsInstance(res, tpe)
552
+ self.assertIn("""'x DESC NULLS LAST'""", str(res))
553
+
554
+ def test_sort_with_nulls_order(self):
555
+ df = self.spark.createDataFrame(
556
+ [("Tom", 80), (None, 60), ("Alice", 50)], ["name", "height"]
557
+ )
558
+ self.assertEqual(
559
+ df.select(df.name).orderBy(F.asc_nulls_first("name")).collect(),
560
+ [Row(name=None), Row(name="Alice"), Row(name="Tom")],
561
+ )
562
+ self.assertEqual(
563
+ df.select(df.name).orderBy(F.asc_nulls_last("name")).collect(),
564
+ [Row(name="Alice"), Row(name="Tom"), Row(name=None)],
565
+ )
566
+ self.assertEqual(
567
+ df.select(df.name).orderBy(F.desc_nulls_first("name")).collect(),
568
+ [Row(name=None), Row(name="Tom"), Row(name="Alice")],
569
+ )
570
+ self.assertEqual(
571
+ df.select(df.name).orderBy(F.desc_nulls_last("name")).collect(),
572
+ [Row(name="Tom"), Row(name="Alice"), Row(name=None)],
573
+ )
574
+
575
+ def test_input_file_name_reset_for_rdd(self):
576
+ rdd = self.sc.textFile("python/test_support/hello/hello.txt").map(lambda x: {"data": x})
577
+ df = self.spark.createDataFrame(rdd, "data STRING")
578
+ df.select(F.input_file_name().alias("file")).collect()
579
+
580
+ non_file_df = self.spark.range(100).select(F.input_file_name())
581
+
582
+ results = non_file_df.collect()
583
+ self.assertTrue(len(results) == 100)
584
+
585
+ # [SPARK-24605]: if everything was properly reset after the last job, this should return
586
+ # empty string rather than the file read in the last job.
587
+ for result in results:
588
+ self.assertEqual(result[0], "")
589
+
590
+ def test_slice(self):
591
+ df = self.spark.createDataFrame(
592
+ [
593
+ (
594
+ [1, 2, 3],
595
+ 2,
596
+ 2,
597
+ ),
598
+ (
599
+ [4, 5],
600
+ 2,
601
+ 2,
602
+ ),
603
+ ],
604
+ ["x", "index", "len"],
605
+ )
606
+
607
+ expected = [Row(sliced=[2, 3]), Row(sliced=[5])]
608
+ self.assertEqual(df.select(F.slice(df.x, 2, 2).alias("sliced")).collect(), expected)
609
+ self.assertEqual(
610
+ df.select(F.slice(df.x, F.lit(2), F.lit(2)).alias("sliced")).collect(), expected
611
+ )
612
+ self.assertEqual(
613
+ df.select(F.slice("x", "index", "len").alias("sliced")).collect(), expected
614
+ )
615
+
616
+ self.assertEqual(
617
+ df.select(F.slice(df.x, F.size(df.x) - 1, F.lit(1)).alias("sliced")).collect(),
618
+ [Row(sliced=[2]), Row(sliced=[4])],
619
+ )
620
+ self.assertEqual(
621
+ df.select(F.slice(df.x, F.lit(1), F.size(df.x) - 1).alias("sliced")).collect(),
622
+ [Row(sliced=[1, 2]), Row(sliced=[4])],
623
+ )
624
+
625
+ def test_array_repeat(self):
626
+ df = self.spark.range(1)
627
+ df = df.withColumn("repeat_n", F.lit(3))
628
+
629
+ expected = [Row(val=[0, 0, 0])]
630
+ self.assertEqual(df.select(F.array_repeat("id", 3).alias("val")).collect(), expected)
631
+ self.assertEqual(df.select(F.array_repeat("id", F.lit(3)).alias("val")).collect(), expected)
632
+ self.assertEqual(
633
+ df.select(F.array_repeat("id", "repeat_n").alias("val")).collect(), expected
634
+ )
635
+
636
+ def test_input_file_name_udf(self):
637
+ df = self.spark.read.text("python/test_support/hello/hello.txt")
638
+ df = df.select(F.udf(lambda x: x)("value"), F.input_file_name().alias("file"))
639
+ file_name = df.collect()[0].file
640
+ self.assertTrue("python/test_support/hello/hello.txt" in file_name)
641
+
642
+ def test_least(self):
643
+ df = self.spark.createDataFrame([(1, 4, 3)], ["a", "b", "c"])
644
+
645
+ expected = [Row(least=1)]
646
+ self.assertEqual(df.select(F.least(df.a, df.b, df.c).alias("least")).collect(), expected)
647
+ self.assertEqual(
648
+ df.select(F.least(F.lit(3), F.lit(5), F.lit(1)).alias("least")).collect(), expected
649
+ )
650
+ self.assertEqual(df.select(F.least("a", "b", "c").alias("least")).collect(), expected)
651
+
652
+ with self.assertRaises(PySparkValueError) as pe:
653
+ df.select(F.least(df.a).alias("least")).collect()
654
+
655
+ self.check_error(
656
+ exception=pe.exception,
657
+ error_class="WRONG_NUM_COLUMNS",
658
+ message_parameters={"func_name": "least", "num_cols": "2"},
659
+ )
660
+
661
+ def test_overlay(self):
662
+ actual = list(
663
+ chain.from_iterable(
664
+ [
665
+ re.findall("(overlay\\(.*\\))", str(x))
666
+ for x in [
667
+ F.overlay(F.col("foo"), F.col("bar"), 1),
668
+ F.overlay("x", "y", 3),
669
+ F.overlay(F.col("x"), F.col("y"), 1, 3),
670
+ F.overlay("x", "y", 2, 5),
671
+ F.overlay("x", "y", F.lit(11)),
672
+ F.overlay("x", "y", F.lit(2), F.lit(5)),
673
+ ]
674
+ ]
675
+ )
676
+ )
677
+
678
+ expected = [
679
+ "overlay(foo, bar, 1, -1)",
680
+ "overlay(x, y, 3, -1)",
681
+ "overlay(x, y, 1, 3)",
682
+ "overlay(x, y, 2, 5)",
683
+ "overlay(x, y, 11, -1)",
684
+ "overlay(x, y, 2, 5)",
685
+ ]
686
+
687
+ self.assertListEqual(actual, expected)
688
+
689
+ df = self.spark.createDataFrame([("SPARK_SQL", "CORE", 7, 0)], ("x", "y", "pos", "len"))
690
+
691
+ exp = [Row(ol="SPARK_CORESQL")]
692
+ self.assertEqual(df.select(F.overlay(df.x, df.y, 7, 0).alias("ol")).collect(), exp)
693
+ self.assertEqual(
694
+ df.select(F.overlay(df.x, df.y, F.lit(7), F.lit(0)).alias("ol")).collect(), exp
695
+ )
696
+ self.assertEqual(df.select(F.overlay("x", "y", "pos", "len").alias("ol")).collect(), exp)
697
+
698
+ with self.assertRaises(PySparkTypeError) as pe:
699
+ df.select(F.overlay(df.x, df.y, 7.5, 0).alias("ol")).collect()
700
+
701
+ self.check_error(
702
+ exception=pe.exception,
703
+ error_class="NOT_COLUMN_OR_INT_OR_STR",
704
+ message_parameters={"arg_name": "pos", "arg_type": "float"},
705
+ )
706
+
707
+ with self.assertRaises(PySparkTypeError) as pe:
708
+ df.select(F.overlay(df.x, df.y, 7, 0.5).alias("ol")).collect()
709
+
710
+ self.check_error(
711
+ exception=pe.exception,
712
+ error_class="NOT_COLUMN_OR_INT_OR_STR",
713
+ message_parameters={"arg_name": "len", "arg_type": "float"},
714
+ )
715
+
716
+ def test_percentile(self):
717
+ actual = list(
718
+ chain.from_iterable(
719
+ [
720
+ re.findall("(percentile\\(.*\\))", str(x))
721
+ for x in [
722
+ F.percentile(F.col("foo"), F.lit(0.5)),
723
+ F.percentile(F.col("bar"), 0.25, 2),
724
+ F.percentile(F.col("bar"), [0.25, 0.5, 0.75]),
725
+ F.percentile(F.col("foo"), (0.05, 0.95), 100),
726
+ F.percentile("foo", 0.5),
727
+ F.percentile("bar", [0.1, 0.9], F.lit(10)),
728
+ ]
729
+ ]
730
+ )
731
+ )
732
+
733
+ expected = [
734
+ "percentile(foo, 0.5, 1)",
735
+ "percentile(bar, 0.25, 2)",
736
+ "percentile(bar, array(0.25, 0.5, 0.75), 1)",
737
+ "percentile(foo, array(0.05, 0.95), 100)",
738
+ "percentile(foo, 0.5, 1)",
739
+ "percentile(bar, array(0.1, 0.9), 10)",
740
+ ]
741
+
742
+ self.assertListEqual(actual, expected)
743
+
744
+ def test_median(self):
745
+ actual = list(
746
+ chain.from_iterable(
747
+ [
748
+ re.findall("(median\\(.*\\))", str(x))
749
+ for x in [
750
+ F.median(F.col("foo")),
751
+ ]
752
+ ]
753
+ )
754
+ )
755
+
756
+ expected = [
757
+ "median(foo)",
758
+ ]
759
+
760
+ self.assertListEqual(actual, expected)
761
+
762
+ def test_percentile_approx(self):
763
+ actual = list(
764
+ chain.from_iterable(
765
+ [
766
+ re.findall("(percentile_approx\\(.*\\))", str(x))
767
+ for x in [
768
+ F.percentile_approx(F.col("foo"), F.lit(0.5)),
769
+ F.percentile_approx(F.col("bar"), 0.25, 42),
770
+ F.percentile_approx(F.col("bar"), [0.25, 0.5, 0.75]),
771
+ F.percentile_approx(F.col("foo"), (0.05, 0.95), 100),
772
+ F.percentile_approx("foo", 0.5),
773
+ F.percentile_approx("bar", [0.1, 0.9], F.lit(10)),
774
+ ]
775
+ ]
776
+ )
777
+ )
778
+
779
+ expected = [
780
+ "percentile_approx(foo, 0.5, 10000)",
781
+ "percentile_approx(bar, 0.25, 42)",
782
+ "percentile_approx(bar, array(0.25, 0.5, 0.75), 10000)",
783
+ "percentile_approx(foo, array(0.05, 0.95), 100)",
784
+ "percentile_approx(foo, 0.5, 10000)",
785
+ "percentile_approx(bar, array(0.1, 0.9), 10)",
786
+ ]
787
+
788
+ self.assertListEqual(actual, expected)
789
+
790
+ def test_nth_value(self):
791
+ df = self.spark.createDataFrame(
792
+ [
793
+ ("a", 0, None),
794
+ ("a", 1, "x"),
795
+ ("a", 2, "y"),
796
+ ("a", 3, "z"),
797
+ ("a", 4, None),
798
+ ("b", 1, None),
799
+ ("b", 2, None),
800
+ ],
801
+ schema=("key", "order", "value"),
802
+ )
803
+ w = Window.partitionBy("key").orderBy("order")
804
+
805
+ rs = df.select(
806
+ df.key,
807
+ df.order,
808
+ F.nth_value("value", 2).over(w),
809
+ F.nth_value("value", 2, False).over(w),
810
+ F.nth_value("value", 2, True).over(w),
811
+ ).collect()
812
+
813
+ expected = [
814
+ ("a", 0, None, None, None),
815
+ ("a", 1, "x", "x", None),
816
+ ("a", 2, "x", "x", "y"),
817
+ ("a", 3, "x", "x", "y"),
818
+ ("a", 4, "x", "x", "y"),
819
+ ("b", 1, None, None, None),
820
+ ("b", 2, None, None, None),
821
+ ]
822
+
823
+ for r, ex in zip(sorted(rs), sorted(expected)):
824
+ self.assertEqual(tuple(r), ex[: len(r)])
825
+
826
+ def test_higher_order_function_failures(self):
827
+ # Should fail with varargs
828
+ with self.assertRaises(PySparkValueError) as pe:
829
+ F.transform(F.col("foo"), lambda *x: F.lit(1))
830
+
831
+ self.check_error(
832
+ exception=pe.exception,
833
+ error_class="UNSUPPORTED_PARAM_TYPE_FOR_HIGHER_ORDER_FUNCTION",
834
+ message_parameters={"func_name": "<lambda>"},
835
+ )
836
+
837
+ # Should fail with kwargs
838
+ with self.assertRaises(PySparkValueError) as pe:
839
+ F.transform(F.col("foo"), lambda **x: F.lit(1))
840
+
841
+ self.check_error(
842
+ exception=pe.exception,
843
+ error_class="UNSUPPORTED_PARAM_TYPE_FOR_HIGHER_ORDER_FUNCTION",
844
+ message_parameters={"func_name": "<lambda>"},
845
+ )
846
+
847
+ # Should fail with nullary function
848
+ with self.assertRaises(PySparkValueError) as pe:
849
+ F.transform(F.col("foo"), lambda: F.lit(1))
850
+
851
+ self.check_error(
852
+ exception=pe.exception,
853
+ error_class="WRONG_NUM_ARGS_FOR_HIGHER_ORDER_FUNCTION",
854
+ message_parameters={"func_name": "<lambda>", "num_args": "0"},
855
+ )
856
+
857
+ # Should fail with quaternary function
858
+ with self.assertRaises(PySparkValueError) as pe:
859
+ F.transform(F.col("foo"), lambda x1, x2, x3, x4: F.lit(1))
860
+
861
+ self.check_error(
862
+ exception=pe.exception,
863
+ error_class="WRONG_NUM_ARGS_FOR_HIGHER_ORDER_FUNCTION",
864
+ message_parameters={"func_name": "<lambda>", "num_args": "4"},
865
+ )
866
+
867
+ # Should fail if function doesn't return Column
868
+ with self.assertRaises(PySparkValueError) as pe:
869
+ F.transform(F.col("foo"), lambda x: 1)
870
+
871
+ self.check_error(
872
+ exception=pe.exception,
873
+ error_class="HIGHER_ORDER_FUNCTION_SHOULD_RETURN_COLUMN",
874
+ message_parameters={"func_name": "<lambda>", "return_type": "int"},
875
+ )
876
+
877
+ def test_nested_higher_order_function(self):
878
+ # SPARK-35382: lambda vars must be resolved properly in nested higher order functions
879
+ df = self.spark.sql("SELECT array(1, 2, 3) as numbers, array('a', 'b', 'c') as letters")
880
+
881
+ actual = df.select(
882
+ F.flatten(
883
+ F.transform(
884
+ "numbers",
885
+ lambda number: F.transform(
886
+ "letters", lambda letter: F.struct(number.alias("n"), letter.alias("l"))
887
+ ),
888
+ )
889
+ )
890
+ ).first()[0]
891
+
892
+ expected = [
893
+ (1, "a"),
894
+ (1, "b"),
895
+ (1, "c"),
896
+ (2, "a"),
897
+ (2, "b"),
898
+ (2, "c"),
899
+ (3, "a"),
900
+ (3, "b"),
901
+ (3, "c"),
902
+ ]
903
+
904
+ self.assertEquals(actual, expected)
905
+
906
+ def test_window_functions(self):
907
+ df = self.spark.createDataFrame([(1, "1"), (2, "2"), (1, "2"), (1, "2")], ["key", "value"])
908
+ w = Window.partitionBy("value").orderBy("key")
909
+
910
+ sel = df.select(
911
+ df.value,
912
+ df.key,
913
+ F.max("key").over(w.rowsBetween(0, 1)),
914
+ F.min("key").over(w.rowsBetween(0, 1)),
915
+ F.count("key").over(w.rowsBetween(float("-inf"), float("inf"))),
916
+ F.row_number().over(w),
917
+ F.rank().over(w),
918
+ F.dense_rank().over(w),
919
+ F.ntile(2).over(w),
920
+ )
921
+ rs = sorted(sel.collect())
922
+ expected = [
923
+ ("1", 1, 1, 1, 1, 1, 1, 1, 1),
924
+ ("2", 1, 1, 1, 3, 1, 1, 1, 1),
925
+ ("2", 1, 2, 1, 3, 2, 1, 1, 1),
926
+ ("2", 2, 2, 2, 3, 3, 3, 2, 2),
927
+ ]
928
+ for r, ex in zip(rs, expected):
929
+ self.assertEqual(tuple(r), ex[: len(r)])
930
+
931
+ def test_window_functions_without_partitionBy(self):
932
+ df = self.spark.createDataFrame([(1, "1"), (2, "2"), (1, "2"), (1, "2")], ["key", "value"])
933
+ w = Window.orderBy("key", df.value)
934
+
935
+ sel = df.select(
936
+ df.value,
937
+ df.key,
938
+ F.max("key").over(w.rowsBetween(0, 1)),
939
+ F.min("key").over(w.rowsBetween(0, 1)),
940
+ F.count("key").over(w.rowsBetween(float("-inf"), float("inf"))),
941
+ F.row_number().over(w),
942
+ F.rank().over(w),
943
+ F.dense_rank().over(w),
944
+ F.ntile(2).over(w),
945
+ )
946
+ rs = sorted(sel.collect())
947
+ expected = [
948
+ ("1", 1, 1, 1, 4, 1, 1, 1, 1),
949
+ ("2", 1, 1, 1, 4, 2, 2, 2, 1),
950
+ ("2", 1, 2, 1, 4, 3, 2, 2, 2),
951
+ ("2", 2, 2, 2, 4, 4, 4, 3, 2),
952
+ ]
953
+ for r, ex in zip(rs, expected):
954
+ self.assertEqual(tuple(r), ex[: len(r)])
955
+
956
+ def test_window_functions_cumulative_sum(self):
957
+ df = self.spark.createDataFrame([("one", 1), ("two", 2)], ["key", "value"])
958
+
959
+ # Test cumulative sum
960
+ sel = df.select(
961
+ df.key, F.sum(df.value).over(Window.rowsBetween(Window.unboundedPreceding, 0))
962
+ )
963
+ rs = sorted(sel.collect())
964
+ expected = [("one", 1), ("two", 3)]
965
+ for r, ex in zip(rs, expected):
966
+ self.assertEqual(tuple(r), ex[: len(r)])
967
+
968
+ # Test boundary values less than JVM's Long.MinValue and make sure we don't overflow
969
+ sel = df.select(
970
+ df.key, F.sum(df.value).over(Window.rowsBetween(Window.unboundedPreceding - 1, 0))
971
+ )
972
+ rs = sorted(sel.collect())
973
+ expected = [("one", 1), ("two", 3)]
974
+ for r, ex in zip(rs, expected):
975
+ self.assertEqual(tuple(r), ex[: len(r)])
976
+
977
+ # Test boundary values greater than JVM's Long.MaxValue and make sure we don't overflow
978
+ frame_end = Window.unboundedFollowing + 1
979
+ sel = df.select(
980
+ df.key, F.sum(df.value).over(Window.rowsBetween(Window.currentRow, frame_end))
981
+ )
982
+ rs = sorted(sel.collect())
983
+ expected = [("one", 3), ("two", 2)]
984
+ for r, ex in zip(rs, expected):
985
+ self.assertEqual(tuple(r), ex[: len(r)])
986
+
987
+ def test_window_time(self):
988
+ df = self.spark.createDataFrame(
989
+ [(datetime.datetime(2016, 3, 11, 9, 0, 7), 1)], ["date", "val"]
990
+ )
991
+
992
+ w = df.groupBy(F.window("date", "5 seconds")).agg(F.sum("val").alias("sum"))
993
+ r = w.select(
994
+ w.window.end.cast("string").alias("end"),
995
+ F.window_time(w.window).cast("string").alias("window_time"),
996
+ "sum",
997
+ ).collect()
998
+ self.assertEqual(
999
+ r[0], Row(end="2016-03-11 09:00:10", window_time="2016-03-11 09:00:09.999999", sum=1)
1000
+ )
1001
+
1002
+ def test_collect_functions(self):
1003
+ df = self.spark.createDataFrame([(1, "1"), (2, "2"), (1, "2"), (1, "2")], ["key", "value"])
1004
+
1005
+ self.assertEqual(sorted(df.select(F.collect_set(df.key).alias("r")).collect()[0].r), [1, 2])
1006
+ self.assertEqual(
1007
+ sorted(df.select(F.collect_list(df.key).alias("r")).collect()[0].r), [1, 1, 1, 2]
1008
+ )
1009
+ self.assertEqual(
1010
+ sorted(df.select(F.collect_set(df.value).alias("r")).collect()[0].r), ["1", "2"]
1011
+ )
1012
+ self.assertEqual(
1013
+ sorted(df.select(F.collect_list(df.value).alias("r")).collect()[0].r),
1014
+ ["1", "2", "2", "2"],
1015
+ )
1016
+
1017
+ def test_datetime_functions(self):
1018
+ df = self.spark.range(1).selectExpr("'2017-01-22' as dateCol")
1019
+ parse_result = df.select(F.to_date(F.col("dateCol"))).first()
1020
+ self.assertEqual(datetime.date(2017, 1, 22), parse_result["to_date(dateCol)"])
1021
+
1022
+ def test_assert_true(self):
1023
+ self.check_assert_true(Py4JJavaError)
1024
+
1025
+ def check_assert_true(self, tpe):
1026
+ df = self.spark.range(3)
1027
+
1028
+ self.assertEqual(
1029
+ df.select(F.assert_true(df.id < 3)).toDF("val").collect(),
1030
+ [Row(val=None), Row(val=None), Row(val=None)],
1031
+ )
1032
+
1033
+ with self.assertRaisesRegex(tpe, "too big"):
1034
+ df.select(F.assert_true(df.id < 2, "too big")).toDF("val").collect()
1035
+
1036
+ with self.assertRaisesRegex(tpe, "2000000"):
1037
+ df.select(F.assert_true(df.id < 2, df.id * 1e6)).toDF("val").collect()
1038
+
1039
+ with self.assertRaises(PySparkTypeError) as pe:
1040
+ df.select(F.assert_true(df.id < 2, 5))
1041
+
1042
+ self.check_error(
1043
+ exception=pe.exception,
1044
+ error_class="NOT_COLUMN_OR_STR",
1045
+ message_parameters={"arg_name": "errMsg", "arg_type": "int"},
1046
+ )
1047
+
1048
+ def test_raise_error(self):
1049
+ self.check_raise_error(Py4JJavaError)
1050
+
1051
+ def check_raise_error(self, tpe):
1052
+ df = self.spark.createDataFrame([Row(id="foobar")])
1053
+
1054
+ with self.assertRaisesRegex(tpe, "foobar"):
1055
+ df.select(F.raise_error(df.id)).collect()
1056
+
1057
+ with self.assertRaisesRegex(tpe, "barfoo"):
1058
+ df.select(F.raise_error("barfoo")).collect()
1059
+
1060
+ with self.assertRaises(PySparkTypeError) as pe:
1061
+ df.select(F.raise_error(None))
1062
+
1063
+ self.check_error(
1064
+ exception=pe.exception,
1065
+ error_class="NOT_COLUMN_OR_STR",
1066
+ message_parameters={"arg_name": "errMsg", "arg_type": "NoneType"},
1067
+ )
1068
+
1069
+ def test_sum_distinct(self):
1070
+ self.spark.range(10).select(
1071
+ F.assert_true(F.sum_distinct(F.col("id")) == F.sumDistinct(F.col("id")))
1072
+ ).collect()
1073
+
1074
+ def test_shiftleft(self):
1075
+ self.spark.range(10).select(
1076
+ F.assert_true(F.shiftLeft(F.col("id"), 2) == F.shiftleft(F.col("id"), 2))
1077
+ ).collect()
1078
+
1079
+ def test_shiftright(self):
1080
+ self.spark.range(10).select(
1081
+ F.assert_true(F.shiftRight(F.col("id"), 2) == F.shiftright(F.col("id"), 2))
1082
+ ).collect()
1083
+
1084
+ def test_shiftrightunsigned(self):
1085
+ self.spark.range(10).select(
1086
+ F.assert_true(
1087
+ F.shiftRightUnsigned(F.col("id"), 2) == F.shiftrightunsigned(F.col("id"), 2)
1088
+ )
1089
+ ).collect()
1090
+
1091
+ def test_lit_day_time_interval(self):
1092
+ td = datetime.timedelta(days=1, hours=12, milliseconds=123)
1093
+ actual = self.spark.range(1).select(F.lit(td)).first()[0]
1094
+ self.assertEqual(actual, td)
1095
+
1096
+ def test_lit_list(self):
1097
+ # SPARK-40271: added list type supporting
1098
+ test_list = [1, 2, 3]
1099
+ expected = [1, 2, 3]
1100
+ actual = self.spark.range(1).select(F.lit(test_list)).first()[0]
1101
+ self.assertEqual(actual, expected)
1102
+
1103
+ test_list = [[1, 2, 3], [3, 4]]
1104
+ expected = [[1, 2, 3], [3, 4]]
1105
+ actual = self.spark.range(1).select(F.lit(test_list)).first()[0]
1106
+ self.assertEqual(actual, expected)
1107
+
1108
+ with self.sql_conf({"spark.sql.ansi.enabled": False}):
1109
+ test_list = ["a", 1, None, 1.0]
1110
+ expected = ["a", "1", None, "1.0"]
1111
+ actual = self.spark.range(1).select(F.lit(test_list)).first()[0]
1112
+ self.assertEqual(actual, expected)
1113
+
1114
+ test_list = [["a", 1, None, 1.0], [1, None, "b"]]
1115
+ expected = [["a", "1", None, "1.0"], ["1", None, "b"]]
1116
+ actual = self.spark.range(1).select(F.lit(test_list)).first()[0]
1117
+ self.assertEqual(actual, expected)
1118
+
1119
+ df = self.spark.range(10)
1120
+ with self.assertRaises(PySparkValueError) as pe:
1121
+ F.lit([df.id, df.id])
1122
+
1123
+ self.check_error(
1124
+ exception=pe.exception,
1125
+ error_class="COLUMN_IN_LIST",
1126
+ message_parameters={"func_name": "lit"},
1127
+ )
1128
+
1129
+ # Test added for SPARK-39832; change Python API to accept both col & str as input
1130
+ def test_regexp_replace(self):
1131
+ df = self.spark.createDataFrame(
1132
+ [("100-200", r"(\d+)", "--")], ["str", "pattern", "replacement"]
1133
+ )
1134
+ self.assertTrue(
1135
+ all(
1136
+ df.select(
1137
+ F.regexp_replace("str", r"(\d+)", "--") == "-----",
1138
+ F.regexp_replace("str", F.col("pattern"), F.col("replacement")) == "-----",
1139
+ ).first()
1140
+ )
1141
+ )
1142
+
1143
+ @unittest.skipIf(not have_numpy, "NumPy not installed")
1144
+ def test_lit_np_scalar(self):
1145
+ import numpy as np
1146
+
1147
+ dtype_to_spark_dtypes = [
1148
+ (np.int8, [("1", "tinyint")]),
1149
+ (np.int16, [("1", "smallint")]),
1150
+ (np.int32, [("1", "int")]),
1151
+ (np.int64, [("1", "bigint")]),
1152
+ (np.float32, [("1.0", "float")]),
1153
+ (np.float64, [("1.0", "double")]),
1154
+ (np.bool_, [("true", "boolean")]),
1155
+ ]
1156
+ for dtype, spark_dtypes in dtype_to_spark_dtypes:
1157
+ with self.subTest(dtype):
1158
+ self.assertEqual(self.spark.range(1).select(F.lit(dtype(1))).dtypes, spark_dtypes)
1159
+
1160
+ @unittest.skipIf(not have_numpy, "NumPy not installed")
1161
+ def test_np_scalar_input(self):
1162
+ import numpy as np
1163
+
1164
+ df = self.spark.createDataFrame([([1, 2, 3],), ([],)], ["data"])
1165
+ for dtype in [np.int8, np.int16, np.int32, np.int64]:
1166
+ res = df.select(F.array_contains(df.data, dtype(1)).alias("b")).collect()
1167
+ self.assertEqual([Row(b=True), Row(b=False)], res)
1168
+ res = df.select(F.array_position(df.data, dtype(1)).alias("c")).collect()
1169
+ self.assertEqual([Row(c=1), Row(c=0)], res)
1170
+
1171
+ df = self.spark.createDataFrame([([1.0, 2.0, 3.0],), ([],)], ["data"])
1172
+ for dtype in [np.float32, np.float64]:
1173
+ res = df.select(F.array_contains(df.data, dtype(1)).alias("b")).collect()
1174
+ self.assertEqual([Row(b=True), Row(b=False)], res)
1175
+ res = df.select(F.array_position(df.data, dtype(1)).alias("c")).collect()
1176
+ self.assertEqual([Row(c=1), Row(c=0)], res)
1177
+
1178
+ @unittest.skipIf(not have_numpy, "NumPy not installed")
1179
+ def test_ndarray_input(self):
1180
+ import numpy as np
1181
+
1182
+ arr_dtype_to_spark_dtypes = [
1183
+ ("int8", [("b", "array<smallint>")]),
1184
+ ("int16", [("b", "array<smallint>")]),
1185
+ ("int32", [("b", "array<int>")]),
1186
+ ("int64", [("b", "array<bigint>")]),
1187
+ ("float32", [("b", "array<float>")]),
1188
+ ("float64", [("b", "array<double>")]),
1189
+ ]
1190
+ for t, expected_spark_dtypes in arr_dtype_to_spark_dtypes:
1191
+ arr = np.array([1, 2]).astype(t)
1192
+ self.assertEqual(
1193
+ expected_spark_dtypes, self.spark.range(1).select(F.lit(arr).alias("b")).dtypes
1194
+ )
1195
+ arr = np.array([1, 2]).astype(np.uint)
1196
+ with self.assertRaises(PySparkTypeError) as pe:
1197
+ self.spark.range(1).select(F.lit(arr).alias("b"))
1198
+
1199
+ self.check_error(
1200
+ exception=pe.exception,
1201
+ error_class="UNSUPPORTED_NUMPY_ARRAY_SCALAR",
1202
+ message_parameters={
1203
+ "dtype": "uint64",
1204
+ },
1205
+ )
1206
+
1207
+ def test_binary_math_function(self):
1208
+ funcs, expected = zip(
1209
+ *[(F.atan2, 0.13664), (F.hypot, 8.07527), (F.pow, 2.14359), (F.pmod, 1.1)]
1210
+ )
1211
+ df = self.spark.range(1).select(*(func(1.1, 8) for func in funcs))
1212
+ for a, e in zip(df.first(), expected):
1213
+ self.assertAlmostEqual(a, e, 5)
1214
+
1215
+ def test_map_functions(self):
1216
+ # SPARK-38496: Check basic functionality of all "map" type related functions
1217
+ expected = {"a": 1, "b": 2}
1218
+ expected2 = {"c": 3, "d": 4}
1219
+ df = self.spark.createDataFrame(
1220
+ [(list(expected.keys()), list(expected.values()))], ["k", "v"]
1221
+ )
1222
+ actual = (
1223
+ df.select(
1224
+ F.expr("map('c', 3, 'd', 4) as dict2"),
1225
+ F.map_from_arrays(df.k, df.v).alias("dict"),
1226
+ "*",
1227
+ )
1228
+ .select(
1229
+ F.map_contains_key("dict", "a").alias("one"),
1230
+ F.map_contains_key("dict", "d").alias("not_exists"),
1231
+ F.map_keys("dict").alias("keys"),
1232
+ F.map_values("dict").alias("values"),
1233
+ F.map_entries("dict").alias("items"),
1234
+ "*",
1235
+ )
1236
+ .select(
1237
+ F.map_concat("dict", "dict2").alias("merged"),
1238
+ F.map_from_entries(F.arrays_zip("keys", "values")).alias("from_items"),
1239
+ "*",
1240
+ )
1241
+ .first()
1242
+ )
1243
+ self.assertEqual(expected, actual["dict"])
1244
+ self.assertTrue(actual["one"])
1245
+ self.assertFalse(actual["not_exists"])
1246
+ self.assertEqual(list(expected.keys()), actual["keys"])
1247
+ self.assertEqual(list(expected.values()), actual["values"])
1248
+ self.assertEqual(expected, dict(actual["items"]))
1249
+ self.assertEqual({**expected, **expected2}, dict(actual["merged"]))
1250
+ self.assertEqual(expected, actual["from_items"])
1251
+
1252
+ def test_schema_of_json(self):
1253
+ with self.assertRaises(PySparkTypeError) as pe:
1254
+ F.schema_of_json(1)
1255
+
1256
+ self.check_error(
1257
+ exception=pe.exception,
1258
+ error_class="NOT_COLUMN_OR_STR",
1259
+ message_parameters={"arg_name": "json", "arg_type": "int"},
1260
+ )
1261
+
1262
+ def test_schema_of_csv(self):
1263
+ with self.assertRaises(PySparkTypeError) as pe:
1264
+ F.schema_of_csv(1)
1265
+
1266
+ self.check_error(
1267
+ exception=pe.exception,
1268
+ error_class="NOT_COLUMN_OR_STR",
1269
+ message_parameters={"arg_name": "csv", "arg_type": "int"},
1270
+ )
1271
+
1272
+ def test_from_csv(self):
1273
+ df = self.spark.range(10)
1274
+ with self.assertRaises(PySparkTypeError) as pe:
1275
+ F.from_csv(df.id, 1)
1276
+
1277
+ self.check_error(
1278
+ exception=pe.exception,
1279
+ error_class="NOT_COLUMN_OR_STR",
1280
+ message_parameters={"arg_name": "schema", "arg_type": "int"},
1281
+ )
1282
+
1283
+ def test_greatest(self):
1284
+ df = self.spark.range(10)
1285
+ with self.assertRaises(PySparkValueError) as pe:
1286
+ F.greatest(df.id)
1287
+
1288
+ self.check_error(
1289
+ exception=pe.exception,
1290
+ error_class="WRONG_NUM_COLUMNS",
1291
+ message_parameters={"func_name": "greatest", "num_cols": "2"},
1292
+ )
1293
+
1294
+ def test_when(self):
1295
+ with self.assertRaises(PySparkTypeError) as pe:
1296
+ F.when("id", 1)
1297
+
1298
+ self.check_error(
1299
+ exception=pe.exception,
1300
+ error_class="NOT_COLUMN",
1301
+ message_parameters={"arg_name": "condition", "arg_type": "str"},
1302
+ )
1303
+
1304
+ def test_window(self):
1305
+ with self.assertRaises(PySparkTypeError) as pe:
1306
+ F.window("date", 5)
1307
+
1308
+ self.check_error(
1309
+ exception=pe.exception,
1310
+ error_class="NOT_STR",
1311
+ message_parameters={"arg_name": "windowDuration", "arg_type": "int"},
1312
+ )
1313
+
1314
+ def test_session_window(self):
1315
+ with self.assertRaises(PySparkTypeError) as pe:
1316
+ F.session_window("date", 5)
1317
+
1318
+ self.check_error(
1319
+ exception=pe.exception,
1320
+ error_class="NOT_COLUMN_OR_STR",
1321
+ message_parameters={"arg_name": "gapDuration", "arg_type": "int"},
1322
+ )
1323
+
1324
+ def test_bucket(self):
1325
+ with self.assertRaises(PySparkTypeError) as pe:
1326
+ F.bucket("5", "id")
1327
+
1328
+ self.check_error(
1329
+ exception=pe.exception,
1330
+ error_class="NOT_COLUMN_OR_INT",
1331
+ message_parameters={"arg_name": "numBuckets", "arg_type": "str"},
1332
+ )
1333
+
1334
+
1335
+ class FunctionsTests(ReusedSQLTestCase, FunctionsTestsMixin):
1336
+ pass
1337
+
1338
+
1339
+ if __name__ == "__main__":
1340
+ import unittest
1341
+ from pyspark.sql.tests.test_functions import * # noqa: F401
1342
+
1343
+ try:
1344
+ import xmlrunner
1345
+
1346
+ testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", verbosity=2)
1347
+ except ImportError:
1348
+ testRunner = None
1349
+ unittest.main(testRunner=testRunner, verbosity=2)