snowpark-connect 0.20.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of snowpark-connect might be problematic. Click here for more details.

Files changed (879) hide show
  1. snowflake/snowpark_connect/__init__.py +23 -0
  2. snowflake/snowpark_connect/analyze_plan/__init__.py +3 -0
  3. snowflake/snowpark_connect/analyze_plan/map_tree_string.py +38 -0
  4. snowflake/snowpark_connect/column_name_handler.py +735 -0
  5. snowflake/snowpark_connect/config.py +576 -0
  6. snowflake/snowpark_connect/constants.py +47 -0
  7. snowflake/snowpark_connect/control_server.py +52 -0
  8. snowflake/snowpark_connect/dataframe_name_handler.py +54 -0
  9. snowflake/snowpark_connect/date_time_format_mapping.py +399 -0
  10. snowflake/snowpark_connect/empty_dataframe.py +18 -0
  11. snowflake/snowpark_connect/error/__init__.py +11 -0
  12. snowflake/snowpark_connect/error/error_mapping.py +6174 -0
  13. snowflake/snowpark_connect/error/error_utils.py +321 -0
  14. snowflake/snowpark_connect/error/exceptions.py +24 -0
  15. snowflake/snowpark_connect/execute_plan/__init__.py +3 -0
  16. snowflake/snowpark_connect/execute_plan/map_execution_command.py +204 -0
  17. snowflake/snowpark_connect/execute_plan/map_execution_root.py +173 -0
  18. snowflake/snowpark_connect/execute_plan/utils.py +183 -0
  19. snowflake/snowpark_connect/expression/__init__.py +3 -0
  20. snowflake/snowpark_connect/expression/literal.py +90 -0
  21. snowflake/snowpark_connect/expression/map_cast.py +343 -0
  22. snowflake/snowpark_connect/expression/map_expression.py +293 -0
  23. snowflake/snowpark_connect/expression/map_extension.py +104 -0
  24. snowflake/snowpark_connect/expression/map_sql_expression.py +633 -0
  25. snowflake/snowpark_connect/expression/map_udf.py +142 -0
  26. snowflake/snowpark_connect/expression/map_unresolved_attribute.py +241 -0
  27. snowflake/snowpark_connect/expression/map_unresolved_extract_value.py +85 -0
  28. snowflake/snowpark_connect/expression/map_unresolved_function.py +9450 -0
  29. snowflake/snowpark_connect/expression/map_unresolved_star.py +218 -0
  30. snowflake/snowpark_connect/expression/map_update_fields.py +164 -0
  31. snowflake/snowpark_connect/expression/map_window_function.py +258 -0
  32. snowflake/snowpark_connect/expression/typer.py +125 -0
  33. snowflake/snowpark_connect/includes/__init__.py +0 -0
  34. snowflake/snowpark_connect/includes/jars/antlr4-runtime-4.9.3.jar +0 -0
  35. snowflake/snowpark_connect/includes/jars/commons-cli-1.5.0.jar +0 -0
  36. snowflake/snowpark_connect/includes/jars/commons-codec-1.16.1.jar +0 -0
  37. snowflake/snowpark_connect/includes/jars/commons-collections-3.2.2.jar +0 -0
  38. snowflake/snowpark_connect/includes/jars/commons-collections4-4.4.jar +0 -0
  39. snowflake/snowpark_connect/includes/jars/commons-compiler-3.1.9.jar +0 -0
  40. snowflake/snowpark_connect/includes/jars/commons-compress-1.26.0.jar +0 -0
  41. snowflake/snowpark_connect/includes/jars/commons-crypto-1.1.0.jar +0 -0
  42. snowflake/snowpark_connect/includes/jars/commons-dbcp-1.4.jar +0 -0
  43. snowflake/snowpark_connect/includes/jars/commons-io-2.16.1.jar +0 -0
  44. snowflake/snowpark_connect/includes/jars/commons-lang-2.6.jar +0 -0
  45. snowflake/snowpark_connect/includes/jars/commons-lang3-3.12.0.jar +0 -0
  46. snowflake/snowpark_connect/includes/jars/commons-logging-1.1.3.jar +0 -0
  47. snowflake/snowpark_connect/includes/jars/commons-math3-3.6.1.jar +0 -0
  48. snowflake/snowpark_connect/includes/jars/commons-pool-1.5.4.jar +0 -0
  49. snowflake/snowpark_connect/includes/jars/commons-text-1.10.0.jar +0 -0
  50. snowflake/snowpark_connect/includes/jars/hadoop-client-api-3.3.4.jar +0 -0
  51. snowflake/snowpark_connect/includes/jars/jackson-annotations-2.15.2.jar +0 -0
  52. snowflake/snowpark_connect/includes/jars/jackson-core-2.15.2.jar +0 -0
  53. snowflake/snowpark_connect/includes/jars/jackson-core-asl-1.9.13.jar +0 -0
  54. snowflake/snowpark_connect/includes/jars/jackson-databind-2.15.2.jar +0 -0
  55. snowflake/snowpark_connect/includes/jars/jackson-dataformat-yaml-2.15.2.jar +0 -0
  56. snowflake/snowpark_connect/includes/jars/jackson-datatype-jsr310-2.15.2.jar +0 -0
  57. snowflake/snowpark_connect/includes/jars/jackson-mapper-asl-1.9.13.jar +0 -0
  58. snowflake/snowpark_connect/includes/jars/jackson-module-scala_2.12-2.15.2.jar +0 -0
  59. snowflake/snowpark_connect/includes/jars/json4s-ast_2.12-3.7.0-M11.jar +0 -0
  60. snowflake/snowpark_connect/includes/jars/json4s-core_2.12-3.7.0-M11.jar +0 -0
  61. snowflake/snowpark_connect/includes/jars/json4s-jackson_2.12-3.7.0-M11.jar +0 -0
  62. snowflake/snowpark_connect/includes/jars/json4s-scalap_2.12-3.7.0-M11.jar +0 -0
  63. snowflake/snowpark_connect/includes/jars/kryo-shaded-4.0.2.jar +0 -0
  64. snowflake/snowpark_connect/includes/jars/log4j-1.2-api-2.20.0.jar +0 -0
  65. snowflake/snowpark_connect/includes/jars/log4j-api-2.20.0.jar +0 -0
  66. snowflake/snowpark_connect/includes/jars/log4j-core-2.20.0.jar +0 -0
  67. snowflake/snowpark_connect/includes/jars/log4j-slf4j2-impl-2.20.0.jar +0 -0
  68. snowflake/snowpark_connect/includes/jars/paranamer-2.8.jar +0 -0
  69. snowflake/snowpark_connect/includes/jars/scala-collection-compat_2.12-2.7.0.jar +0 -0
  70. snowflake/snowpark_connect/includes/jars/scala-compiler-2.12.18.jar +0 -0
  71. snowflake/snowpark_connect/includes/jars/scala-library-2.12.18.jar +0 -0
  72. snowflake/snowpark_connect/includes/jars/scala-parser-combinators_2.12-2.3.0.jar +0 -0
  73. snowflake/snowpark_connect/includes/jars/scala-reflect-2.12.18.jar +0 -0
  74. snowflake/snowpark_connect/includes/jars/scala-xml_2.12-2.1.0.jar +0 -0
  75. snowflake/snowpark_connect/includes/jars/slf4j-api-2.0.7.jar +0 -0
  76. snowflake/snowpark_connect/includes/jars/spark-catalyst_2.12-3.5.6.jar +0 -0
  77. snowflake/snowpark_connect/includes/jars/spark-common-utils_2.12-3.5.6.jar +0 -0
  78. snowflake/snowpark_connect/includes/jars/spark-core_2.12-3.5.6.jar +0 -0
  79. snowflake/snowpark_connect/includes/jars/spark-graphx_2.12-3.5.6.jar +0 -0
  80. snowflake/snowpark_connect/includes/jars/spark-hive-thriftserver_2.12-3.5.6.jar +0 -0
  81. snowflake/snowpark_connect/includes/jars/spark-hive_2.12-3.5.6.jar +0 -0
  82. snowflake/snowpark_connect/includes/jars/spark-kubernetes_2.12-3.5.6.jar +0 -0
  83. snowflake/snowpark_connect/includes/jars/spark-kvstore_2.12-3.5.6.jar +0 -0
  84. snowflake/snowpark_connect/includes/jars/spark-launcher_2.12-3.5.6.jar +0 -0
  85. snowflake/snowpark_connect/includes/jars/spark-mesos_2.12-3.5.6.jar +0 -0
  86. snowflake/snowpark_connect/includes/jars/spark-mllib-local_2.12-3.5.6.jar +0 -0
  87. snowflake/snowpark_connect/includes/jars/spark-mllib_2.12-3.5.6.jar +0 -0
  88. snowflake/snowpark_connect/includes/jars/spark-network-common_2.12-3.5.6.jar +0 -0
  89. snowflake/snowpark_connect/includes/jars/spark-network-shuffle_2.12-3.5.6.jar +0 -0
  90. snowflake/snowpark_connect/includes/jars/spark-repl_2.12-3.5.6.jar +0 -0
  91. snowflake/snowpark_connect/includes/jars/spark-sketch_2.12-3.5.6.jar +0 -0
  92. snowflake/snowpark_connect/includes/jars/spark-sql-api_2.12-3.5.6.jar +0 -0
  93. snowflake/snowpark_connect/includes/jars/spark-sql_2.12-3.5.6.jar +0 -0
  94. snowflake/snowpark_connect/includes/jars/spark-streaming_2.12-3.5.6.jar +0 -0
  95. snowflake/snowpark_connect/includes/jars/spark-tags_2.12-3.5.6.jar +0 -0
  96. snowflake/snowpark_connect/includes/jars/spark-unsafe_2.12-3.5.6.jar +0 -0
  97. snowflake/snowpark_connect/includes/jars/spark-yarn_2.12-3.5.6.jar +0 -0
  98. snowflake/snowpark_connect/includes/python/__init__.py +21 -0
  99. snowflake/snowpark_connect/includes/python/pyspark/__init__.py +173 -0
  100. snowflake/snowpark_connect/includes/python/pyspark/_globals.py +71 -0
  101. snowflake/snowpark_connect/includes/python/pyspark/_typing.pyi +43 -0
  102. snowflake/snowpark_connect/includes/python/pyspark/accumulators.py +341 -0
  103. snowflake/snowpark_connect/includes/python/pyspark/broadcast.py +383 -0
  104. snowflake/snowpark_connect/includes/python/pyspark/cloudpickle/__init__.py +8 -0
  105. snowflake/snowpark_connect/includes/python/pyspark/cloudpickle/cloudpickle.py +948 -0
  106. snowflake/snowpark_connect/includes/python/pyspark/cloudpickle/cloudpickle_fast.py +844 -0
  107. snowflake/snowpark_connect/includes/python/pyspark/cloudpickle/compat.py +18 -0
  108. snowflake/snowpark_connect/includes/python/pyspark/conf.py +276 -0
  109. snowflake/snowpark_connect/includes/python/pyspark/context.py +2601 -0
  110. snowflake/snowpark_connect/includes/python/pyspark/daemon.py +218 -0
  111. snowflake/snowpark_connect/includes/python/pyspark/errors/__init__.py +70 -0
  112. snowflake/snowpark_connect/includes/python/pyspark/errors/error_classes.py +889 -0
  113. snowflake/snowpark_connect/includes/python/pyspark/errors/exceptions/__init__.py +16 -0
  114. snowflake/snowpark_connect/includes/python/pyspark/errors/exceptions/base.py +228 -0
  115. snowflake/snowpark_connect/includes/python/pyspark/errors/exceptions/captured.py +307 -0
  116. snowflake/snowpark_connect/includes/python/pyspark/errors/exceptions/connect.py +190 -0
  117. snowflake/snowpark_connect/includes/python/pyspark/errors/tests/__init__.py +16 -0
  118. snowflake/snowpark_connect/includes/python/pyspark/errors/tests/test_errors.py +60 -0
  119. snowflake/snowpark_connect/includes/python/pyspark/errors/utils.py +116 -0
  120. snowflake/snowpark_connect/includes/python/pyspark/files.py +165 -0
  121. snowflake/snowpark_connect/includes/python/pyspark/find_spark_home.py +95 -0
  122. snowflake/snowpark_connect/includes/python/pyspark/install.py +203 -0
  123. snowflake/snowpark_connect/includes/python/pyspark/instrumentation_utils.py +190 -0
  124. snowflake/snowpark_connect/includes/python/pyspark/java_gateway.py +248 -0
  125. snowflake/snowpark_connect/includes/python/pyspark/join.py +118 -0
  126. snowflake/snowpark_connect/includes/python/pyspark/ml/__init__.py +71 -0
  127. snowflake/snowpark_connect/includes/python/pyspark/ml/_typing.pyi +84 -0
  128. snowflake/snowpark_connect/includes/python/pyspark/ml/base.py +414 -0
  129. snowflake/snowpark_connect/includes/python/pyspark/ml/classification.py +4332 -0
  130. snowflake/snowpark_connect/includes/python/pyspark/ml/clustering.py +2188 -0
  131. snowflake/snowpark_connect/includes/python/pyspark/ml/common.py +146 -0
  132. snowflake/snowpark_connect/includes/python/pyspark/ml/connect/__init__.py +44 -0
  133. snowflake/snowpark_connect/includes/python/pyspark/ml/connect/base.py +346 -0
  134. snowflake/snowpark_connect/includes/python/pyspark/ml/connect/classification.py +382 -0
  135. snowflake/snowpark_connect/includes/python/pyspark/ml/connect/evaluation.py +291 -0
  136. snowflake/snowpark_connect/includes/python/pyspark/ml/connect/feature.py +258 -0
  137. snowflake/snowpark_connect/includes/python/pyspark/ml/connect/functions.py +77 -0
  138. snowflake/snowpark_connect/includes/python/pyspark/ml/connect/io_utils.py +335 -0
  139. snowflake/snowpark_connect/includes/python/pyspark/ml/connect/pipeline.py +262 -0
  140. snowflake/snowpark_connect/includes/python/pyspark/ml/connect/summarizer.py +120 -0
  141. snowflake/snowpark_connect/includes/python/pyspark/ml/connect/tuning.py +579 -0
  142. snowflake/snowpark_connect/includes/python/pyspark/ml/connect/util.py +173 -0
  143. snowflake/snowpark_connect/includes/python/pyspark/ml/deepspeed/__init__.py +16 -0
  144. snowflake/snowpark_connect/includes/python/pyspark/ml/deepspeed/deepspeed_distributor.py +165 -0
  145. snowflake/snowpark_connect/includes/python/pyspark/ml/deepspeed/tests/test_deepspeed_distributor.py +306 -0
  146. snowflake/snowpark_connect/includes/python/pyspark/ml/dl_util.py +150 -0
  147. snowflake/snowpark_connect/includes/python/pyspark/ml/evaluation.py +1166 -0
  148. snowflake/snowpark_connect/includes/python/pyspark/ml/feature.py +7474 -0
  149. snowflake/snowpark_connect/includes/python/pyspark/ml/fpm.py +543 -0
  150. snowflake/snowpark_connect/includes/python/pyspark/ml/functions.py +842 -0
  151. snowflake/snowpark_connect/includes/python/pyspark/ml/image.py +271 -0
  152. snowflake/snowpark_connect/includes/python/pyspark/ml/linalg/__init__.py +1382 -0
  153. snowflake/snowpark_connect/includes/python/pyspark/ml/model_cache.py +55 -0
  154. snowflake/snowpark_connect/includes/python/pyspark/ml/param/__init__.py +602 -0
  155. snowflake/snowpark_connect/includes/python/pyspark/ml/param/_shared_params_code_gen.py +368 -0
  156. snowflake/snowpark_connect/includes/python/pyspark/ml/param/shared.py +878 -0
  157. snowflake/snowpark_connect/includes/python/pyspark/ml/pipeline.py +451 -0
  158. snowflake/snowpark_connect/includes/python/pyspark/ml/recommendation.py +748 -0
  159. snowflake/snowpark_connect/includes/python/pyspark/ml/regression.py +3335 -0
  160. snowflake/snowpark_connect/includes/python/pyspark/ml/stat.py +523 -0
  161. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/__init__.py +16 -0
  162. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_connect_classification.py +53 -0
  163. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_connect_evaluation.py +50 -0
  164. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_connect_feature.py +43 -0
  165. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_connect_function.py +114 -0
  166. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_connect_pipeline.py +47 -0
  167. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_connect_summarizer.py +43 -0
  168. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_connect_tuning.py +46 -0
  169. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_legacy_mode_classification.py +238 -0
  170. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_legacy_mode_evaluation.py +194 -0
  171. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_legacy_mode_feature.py +156 -0
  172. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_legacy_mode_pipeline.py +184 -0
  173. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_legacy_mode_summarizer.py +78 -0
  174. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_legacy_mode_tuning.py +292 -0
  175. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_parity_torch_data_loader.py +50 -0
  176. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_parity_torch_distributor.py +152 -0
  177. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_algorithms.py +456 -0
  178. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_base.py +96 -0
  179. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_dl_util.py +186 -0
  180. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_evaluation.py +77 -0
  181. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_feature.py +401 -0
  182. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_functions.py +528 -0
  183. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_image.py +82 -0
  184. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_linalg.py +409 -0
  185. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_model_cache.py +55 -0
  186. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_param.py +441 -0
  187. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_persistence.py +546 -0
  188. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_pipeline.py +71 -0
  189. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_stat.py +52 -0
  190. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_training_summary.py +494 -0
  191. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_util.py +85 -0
  192. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_wrapper.py +138 -0
  193. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/tuning/__init__.py +16 -0
  194. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/tuning/test_cv_io_basic.py +151 -0
  195. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/tuning/test_cv_io_nested.py +97 -0
  196. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/tuning/test_cv_io_pipeline.py +143 -0
  197. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/tuning/test_tuning.py +551 -0
  198. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/tuning/test_tvs_io_basic.py +137 -0
  199. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/tuning/test_tvs_io_nested.py +96 -0
  200. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/tuning/test_tvs_io_pipeline.py +142 -0
  201. snowflake/snowpark_connect/includes/python/pyspark/ml/torch/__init__.py +16 -0
  202. snowflake/snowpark_connect/includes/python/pyspark/ml/torch/data.py +100 -0
  203. snowflake/snowpark_connect/includes/python/pyspark/ml/torch/distributor.py +1133 -0
  204. snowflake/snowpark_connect/includes/python/pyspark/ml/torch/log_communication.py +198 -0
  205. snowflake/snowpark_connect/includes/python/pyspark/ml/torch/tests/__init__.py +16 -0
  206. snowflake/snowpark_connect/includes/python/pyspark/ml/torch/tests/test_data_loader.py +137 -0
  207. snowflake/snowpark_connect/includes/python/pyspark/ml/torch/tests/test_distributor.py +561 -0
  208. snowflake/snowpark_connect/includes/python/pyspark/ml/torch/tests/test_log_communication.py +172 -0
  209. snowflake/snowpark_connect/includes/python/pyspark/ml/torch/torch_run_process_wrapper.py +83 -0
  210. snowflake/snowpark_connect/includes/python/pyspark/ml/tree.py +434 -0
  211. snowflake/snowpark_connect/includes/python/pyspark/ml/tuning.py +1741 -0
  212. snowflake/snowpark_connect/includes/python/pyspark/ml/util.py +749 -0
  213. snowflake/snowpark_connect/includes/python/pyspark/ml/wrapper.py +465 -0
  214. snowflake/snowpark_connect/includes/python/pyspark/mllib/__init__.py +44 -0
  215. snowflake/snowpark_connect/includes/python/pyspark/mllib/_typing.pyi +33 -0
  216. snowflake/snowpark_connect/includes/python/pyspark/mllib/classification.py +989 -0
  217. snowflake/snowpark_connect/includes/python/pyspark/mllib/clustering.py +1318 -0
  218. snowflake/snowpark_connect/includes/python/pyspark/mllib/common.py +174 -0
  219. snowflake/snowpark_connect/includes/python/pyspark/mllib/evaluation.py +691 -0
  220. snowflake/snowpark_connect/includes/python/pyspark/mllib/feature.py +1085 -0
  221. snowflake/snowpark_connect/includes/python/pyspark/mllib/fpm.py +233 -0
  222. snowflake/snowpark_connect/includes/python/pyspark/mllib/linalg/__init__.py +1653 -0
  223. snowflake/snowpark_connect/includes/python/pyspark/mllib/linalg/distributed.py +1662 -0
  224. snowflake/snowpark_connect/includes/python/pyspark/mllib/random.py +698 -0
  225. snowflake/snowpark_connect/includes/python/pyspark/mllib/recommendation.py +389 -0
  226. snowflake/snowpark_connect/includes/python/pyspark/mllib/regression.py +1067 -0
  227. snowflake/snowpark_connect/includes/python/pyspark/mllib/stat/KernelDensity.py +59 -0
  228. snowflake/snowpark_connect/includes/python/pyspark/mllib/stat/__init__.py +34 -0
  229. snowflake/snowpark_connect/includes/python/pyspark/mllib/stat/_statistics.py +409 -0
  230. snowflake/snowpark_connect/includes/python/pyspark/mllib/stat/distribution.py +39 -0
  231. snowflake/snowpark_connect/includes/python/pyspark/mllib/stat/test.py +86 -0
  232. snowflake/snowpark_connect/includes/python/pyspark/mllib/tests/__init__.py +16 -0
  233. snowflake/snowpark_connect/includes/python/pyspark/mllib/tests/test_algorithms.py +353 -0
  234. snowflake/snowpark_connect/includes/python/pyspark/mllib/tests/test_feature.py +192 -0
  235. snowflake/snowpark_connect/includes/python/pyspark/mllib/tests/test_linalg.py +680 -0
  236. snowflake/snowpark_connect/includes/python/pyspark/mllib/tests/test_stat.py +206 -0
  237. snowflake/snowpark_connect/includes/python/pyspark/mllib/tests/test_streaming_algorithms.py +471 -0
  238. snowflake/snowpark_connect/includes/python/pyspark/mllib/tests/test_util.py +108 -0
  239. snowflake/snowpark_connect/includes/python/pyspark/mllib/tree.py +888 -0
  240. snowflake/snowpark_connect/includes/python/pyspark/mllib/util.py +659 -0
  241. snowflake/snowpark_connect/includes/python/pyspark/pandas/__init__.py +165 -0
  242. snowflake/snowpark_connect/includes/python/pyspark/pandas/_typing.py +52 -0
  243. snowflake/snowpark_connect/includes/python/pyspark/pandas/accessors.py +989 -0
  244. snowflake/snowpark_connect/includes/python/pyspark/pandas/base.py +1804 -0
  245. snowflake/snowpark_connect/includes/python/pyspark/pandas/categorical.py +822 -0
  246. snowflake/snowpark_connect/includes/python/pyspark/pandas/config.py +539 -0
  247. snowflake/snowpark_connect/includes/python/pyspark/pandas/correlation.py +262 -0
  248. snowflake/snowpark_connect/includes/python/pyspark/pandas/data_type_ops/__init__.py +16 -0
  249. snowflake/snowpark_connect/includes/python/pyspark/pandas/data_type_ops/base.py +519 -0
  250. snowflake/snowpark_connect/includes/python/pyspark/pandas/data_type_ops/binary_ops.py +98 -0
  251. snowflake/snowpark_connect/includes/python/pyspark/pandas/data_type_ops/boolean_ops.py +426 -0
  252. snowflake/snowpark_connect/includes/python/pyspark/pandas/data_type_ops/categorical_ops.py +141 -0
  253. snowflake/snowpark_connect/includes/python/pyspark/pandas/data_type_ops/complex_ops.py +145 -0
  254. snowflake/snowpark_connect/includes/python/pyspark/pandas/data_type_ops/date_ops.py +127 -0
  255. snowflake/snowpark_connect/includes/python/pyspark/pandas/data_type_ops/datetime_ops.py +171 -0
  256. snowflake/snowpark_connect/includes/python/pyspark/pandas/data_type_ops/null_ops.py +83 -0
  257. snowflake/snowpark_connect/includes/python/pyspark/pandas/data_type_ops/num_ops.py +588 -0
  258. snowflake/snowpark_connect/includes/python/pyspark/pandas/data_type_ops/string_ops.py +154 -0
  259. snowflake/snowpark_connect/includes/python/pyspark/pandas/data_type_ops/timedelta_ops.py +101 -0
  260. snowflake/snowpark_connect/includes/python/pyspark/pandas/data_type_ops/udt_ops.py +29 -0
  261. snowflake/snowpark_connect/includes/python/pyspark/pandas/datetimes.py +891 -0
  262. snowflake/snowpark_connect/includes/python/pyspark/pandas/exceptions.py +150 -0
  263. snowflake/snowpark_connect/includes/python/pyspark/pandas/extensions.py +388 -0
  264. snowflake/snowpark_connect/includes/python/pyspark/pandas/frame.py +13738 -0
  265. snowflake/snowpark_connect/includes/python/pyspark/pandas/generic.py +3560 -0
  266. snowflake/snowpark_connect/includes/python/pyspark/pandas/groupby.py +4448 -0
  267. snowflake/snowpark_connect/includes/python/pyspark/pandas/indexes/__init__.py +21 -0
  268. snowflake/snowpark_connect/includes/python/pyspark/pandas/indexes/base.py +2783 -0
  269. snowflake/snowpark_connect/includes/python/pyspark/pandas/indexes/category.py +773 -0
  270. snowflake/snowpark_connect/includes/python/pyspark/pandas/indexes/datetimes.py +843 -0
  271. snowflake/snowpark_connect/includes/python/pyspark/pandas/indexes/multi.py +1323 -0
  272. snowflake/snowpark_connect/includes/python/pyspark/pandas/indexes/numeric.py +210 -0
  273. snowflake/snowpark_connect/includes/python/pyspark/pandas/indexes/timedelta.py +197 -0
  274. snowflake/snowpark_connect/includes/python/pyspark/pandas/indexing.py +1862 -0
  275. snowflake/snowpark_connect/includes/python/pyspark/pandas/internal.py +1680 -0
  276. snowflake/snowpark_connect/includes/python/pyspark/pandas/missing/__init__.py +48 -0
  277. snowflake/snowpark_connect/includes/python/pyspark/pandas/missing/common.py +76 -0
  278. snowflake/snowpark_connect/includes/python/pyspark/pandas/missing/frame.py +63 -0
  279. snowflake/snowpark_connect/includes/python/pyspark/pandas/missing/general_functions.py +43 -0
  280. snowflake/snowpark_connect/includes/python/pyspark/pandas/missing/groupby.py +93 -0
  281. snowflake/snowpark_connect/includes/python/pyspark/pandas/missing/indexes.py +184 -0
  282. snowflake/snowpark_connect/includes/python/pyspark/pandas/missing/resample.py +101 -0
  283. snowflake/snowpark_connect/includes/python/pyspark/pandas/missing/scalars.py +29 -0
  284. snowflake/snowpark_connect/includes/python/pyspark/pandas/missing/series.py +69 -0
  285. snowflake/snowpark_connect/includes/python/pyspark/pandas/missing/window.py +168 -0
  286. snowflake/snowpark_connect/includes/python/pyspark/pandas/mlflow.py +238 -0
  287. snowflake/snowpark_connect/includes/python/pyspark/pandas/namespace.py +3807 -0
  288. snowflake/snowpark_connect/includes/python/pyspark/pandas/numpy_compat.py +260 -0
  289. snowflake/snowpark_connect/includes/python/pyspark/pandas/plot/__init__.py +17 -0
  290. snowflake/snowpark_connect/includes/python/pyspark/pandas/plot/core.py +1213 -0
  291. snowflake/snowpark_connect/includes/python/pyspark/pandas/plot/matplotlib.py +928 -0
  292. snowflake/snowpark_connect/includes/python/pyspark/pandas/plot/plotly.py +261 -0
  293. snowflake/snowpark_connect/includes/python/pyspark/pandas/resample.py +816 -0
  294. snowflake/snowpark_connect/includes/python/pyspark/pandas/series.py +7440 -0
  295. snowflake/snowpark_connect/includes/python/pyspark/pandas/sql_formatter.py +308 -0
  296. snowflake/snowpark_connect/includes/python/pyspark/pandas/sql_processor.py +394 -0
  297. snowflake/snowpark_connect/includes/python/pyspark/pandas/strings.py +2371 -0
  298. snowflake/snowpark_connect/includes/python/pyspark/pandas/supported_api_gen.py +378 -0
  299. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/__init__.py +16 -0
  300. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/__init__.py +16 -0
  301. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_any_all.py +177 -0
  302. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_apply_func.py +575 -0
  303. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_binary_ops.py +235 -0
  304. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_combine.py +653 -0
  305. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_compute.py +463 -0
  306. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_corrwith.py +86 -0
  307. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_cov.py +151 -0
  308. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_cumulative.py +139 -0
  309. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_describe.py +458 -0
  310. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_eval.py +86 -0
  311. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_melt.py +202 -0
  312. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_missing_data.py +520 -0
  313. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_pivot.py +361 -0
  314. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/__init__.py +16 -0
  315. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/__init__.py +16 -0
  316. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_any_all.py +40 -0
  317. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_apply_func.py +42 -0
  318. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_binary_ops.py +40 -0
  319. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_combine.py +37 -0
  320. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_compute.py +60 -0
  321. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_corrwith.py +40 -0
  322. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_cov.py +40 -0
  323. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_cumulative.py +90 -0
  324. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_describe.py +40 -0
  325. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_eval.py +40 -0
  326. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_melt.py +40 -0
  327. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_missing_data.py +42 -0
  328. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_pivot.py +37 -0
  329. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/__init__.py +16 -0
  330. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_base.py +36 -0
  331. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_binary_ops.py +42 -0
  332. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_boolean_ops.py +47 -0
  333. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_categorical_ops.py +55 -0
  334. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_complex_ops.py +40 -0
  335. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_date_ops.py +47 -0
  336. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_datetime_ops.py +47 -0
  337. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_null_ops.py +42 -0
  338. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_num_arithmetic.py +43 -0
  339. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_num_ops.py +47 -0
  340. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_num_reverse.py +43 -0
  341. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_string_ops.py +47 -0
  342. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_timedelta_ops.py +47 -0
  343. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_udt_ops.py +40 -0
  344. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/testing_utils.py +226 -0
  345. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/__init__.py +16 -0
  346. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_align.py +39 -0
  347. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_basic_slow.py +55 -0
  348. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_cov_corrwith.py +39 -0
  349. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_dot_frame.py +39 -0
  350. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_dot_series.py +39 -0
  351. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_index.py +39 -0
  352. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_series.py +39 -0
  353. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_setitem_frame.py +43 -0
  354. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_setitem_series.py +43 -0
  355. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/__init__.py +16 -0
  356. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_attrs.py +40 -0
  357. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_constructor.py +39 -0
  358. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_conversion.py +42 -0
  359. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_reindexing.py +42 -0
  360. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_reshaping.py +37 -0
  361. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_spark.py +40 -0
  362. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_take.py +42 -0
  363. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_time_series.py +48 -0
  364. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_truncate.py +40 -0
  365. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/__init__.py +16 -0
  366. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_aggregate.py +40 -0
  367. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_apply_func.py +41 -0
  368. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_cumulative.py +67 -0
  369. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_describe.py +40 -0
  370. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_groupby.py +55 -0
  371. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_head_tail.py +40 -0
  372. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_index.py +38 -0
  373. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_missing_data.py +55 -0
  374. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_split_apply.py +39 -0
  375. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_stat.py +38 -0
  376. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/__init__.py +16 -0
  377. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_align.py +40 -0
  378. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_base.py +50 -0
  379. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_category.py +73 -0
  380. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_datetime.py +39 -0
  381. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_indexing.py +40 -0
  382. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_reindex.py +40 -0
  383. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_rename.py +40 -0
  384. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_reset_index.py +48 -0
  385. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_timedelta.py +39 -0
  386. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/io/__init__.py +16 -0
  387. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/io/test_parity_io.py +40 -0
  388. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/plot/__init__.py +16 -0
  389. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/plot/test_parity_frame_plot.py +45 -0
  390. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/plot/test_parity_frame_plot_matplotlib.py +45 -0
  391. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/plot/test_parity_frame_plot_plotly.py +49 -0
  392. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/plot/test_parity_series_plot.py +37 -0
  393. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/plot/test_parity_series_plot_matplotlib.py +53 -0
  394. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/plot/test_parity_series_plot_plotly.py +45 -0
  395. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/__init__.py +16 -0
  396. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_all_any.py +38 -0
  397. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_arg_ops.py +37 -0
  398. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_as_of.py +37 -0
  399. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_as_type.py +38 -0
  400. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_compute.py +37 -0
  401. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_conversion.py +40 -0
  402. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_cumulative.py +40 -0
  403. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_index.py +38 -0
  404. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_missing_data.py +40 -0
  405. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_series.py +37 -0
  406. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_sort.py +38 -0
  407. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_stat.py +38 -0
  408. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_categorical.py +66 -0
  409. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_config.py +37 -0
  410. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_csv.py +37 -0
  411. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_dataframe_conversion.py +42 -0
  412. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_dataframe_spark_io.py +39 -0
  413. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_default_index.py +49 -0
  414. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_ewm.py +37 -0
  415. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_expanding.py +39 -0
  416. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_extension.py +49 -0
  417. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_frame_spark.py +53 -0
  418. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_generic_functions.py +43 -0
  419. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_indexing.py +49 -0
  420. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_indexops_spark.py +39 -0
  421. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_internal.py +41 -0
  422. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_namespace.py +39 -0
  423. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_numpy_compat.py +60 -0
  424. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_ops_on_diff_frames.py +48 -0
  425. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_ops_on_diff_frames_groupby.py +39 -0
  426. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_ops_on_diff_frames_groupby_expanding.py +44 -0
  427. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_ops_on_diff_frames_groupby_rolling.py +84 -0
  428. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_repr.py +37 -0
  429. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_resample.py +45 -0
  430. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_reshape.py +39 -0
  431. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_rolling.py +39 -0
  432. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_scalars.py +37 -0
  433. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_series_conversion.py +39 -0
  434. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_series_datetime.py +39 -0
  435. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_series_string.py +39 -0
  436. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_spark_functions.py +39 -0
  437. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_sql.py +43 -0
  438. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_stats.py +37 -0
  439. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_typedef.py +36 -0
  440. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_utils.py +37 -0
  441. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_window.py +39 -0
  442. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/__init__.py +16 -0
  443. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_base.py +107 -0
  444. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_binary_ops.py +224 -0
  445. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_boolean_ops.py +825 -0
  446. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_categorical_ops.py +562 -0
  447. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_complex_ops.py +368 -0
  448. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_date_ops.py +257 -0
  449. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_datetime_ops.py +260 -0
  450. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_null_ops.py +178 -0
  451. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_num_arithmetic.py +184 -0
  452. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_num_ops.py +497 -0
  453. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_num_reverse.py +140 -0
  454. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_string_ops.py +354 -0
  455. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_timedelta_ops.py +219 -0
  456. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_udt_ops.py +192 -0
  457. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/testing_utils.py +228 -0
  458. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/__init__.py +16 -0
  459. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_align.py +118 -0
  460. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_basic_slow.py +198 -0
  461. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_cov_corrwith.py +181 -0
  462. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_dot_frame.py +103 -0
  463. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_dot_series.py +141 -0
  464. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_index.py +109 -0
  465. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_series.py +136 -0
  466. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_setitem_frame.py +125 -0
  467. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_setitem_series.py +217 -0
  468. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/__init__.py +16 -0
  469. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_attrs.py +384 -0
  470. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_constructor.py +598 -0
  471. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_conversion.py +73 -0
  472. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_reindexing.py +869 -0
  473. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_reshaping.py +487 -0
  474. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_spark.py +309 -0
  475. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_take.py +156 -0
  476. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_time_series.py +149 -0
  477. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_truncate.py +163 -0
  478. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/__init__.py +16 -0
  479. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_aggregate.py +311 -0
  480. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_apply_func.py +524 -0
  481. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_cumulative.py +419 -0
  482. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_describe.py +144 -0
  483. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_groupby.py +979 -0
  484. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_head_tail.py +234 -0
  485. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_index.py +206 -0
  486. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_missing_data.py +421 -0
  487. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_split_apply.py +187 -0
  488. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_stat.py +397 -0
  489. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/__init__.py +16 -0
  490. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_align.py +100 -0
  491. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_base.py +2743 -0
  492. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_category.py +484 -0
  493. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_datetime.py +276 -0
  494. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_indexing.py +432 -0
  495. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_reindex.py +310 -0
  496. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_rename.py +257 -0
  497. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_reset_index.py +160 -0
  498. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_timedelta.py +128 -0
  499. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/io/__init__.py +16 -0
  500. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/io/test_io.py +137 -0
  501. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/plot/__init__.py +16 -0
  502. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/plot/test_frame_plot.py +170 -0
  503. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/plot/test_frame_plot_matplotlib.py +547 -0
  504. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/plot/test_frame_plot_plotly.py +285 -0
  505. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/plot/test_series_plot.py +106 -0
  506. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/plot/test_series_plot_matplotlib.py +409 -0
  507. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/plot/test_series_plot_plotly.py +247 -0
  508. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/__init__.py +16 -0
  509. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_all_any.py +105 -0
  510. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_arg_ops.py +197 -0
  511. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_as_of.py +137 -0
  512. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_as_type.py +227 -0
  513. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_compute.py +634 -0
  514. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_conversion.py +88 -0
  515. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_cumulative.py +139 -0
  516. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_index.py +475 -0
  517. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_missing_data.py +265 -0
  518. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_series.py +818 -0
  519. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_sort.py +162 -0
  520. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_stat.py +780 -0
  521. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_categorical.py +741 -0
  522. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_config.py +160 -0
  523. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_csv.py +453 -0
  524. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_dataframe_conversion.py +281 -0
  525. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_dataframe_spark_io.py +487 -0
  526. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_default_index.py +109 -0
  527. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_ewm.py +434 -0
  528. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_expanding.py +253 -0
  529. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_extension.py +152 -0
  530. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_frame_spark.py +162 -0
  531. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_generic_functions.py +234 -0
  532. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_indexing.py +1339 -0
  533. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_indexops_spark.py +82 -0
  534. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_internal.py +124 -0
  535. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_namespace.py +638 -0
  536. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_numpy_compat.py +200 -0
  537. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_ops_on_diff_frames.py +1355 -0
  538. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_ops_on_diff_frames_groupby.py +655 -0
  539. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_ops_on_diff_frames_groupby_expanding.py +113 -0
  540. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_ops_on_diff_frames_groupby_rolling.py +118 -0
  541. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_repr.py +192 -0
  542. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_resample.py +346 -0
  543. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_reshape.py +495 -0
  544. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_rolling.py +263 -0
  545. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_scalars.py +59 -0
  546. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_series_conversion.py +85 -0
  547. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_series_datetime.py +364 -0
  548. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_series_string.py +362 -0
  549. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_spark_functions.py +46 -0
  550. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_sql.py +123 -0
  551. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_stats.py +581 -0
  552. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_typedef.py +447 -0
  553. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_utils.py +301 -0
  554. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_window.py +465 -0
  555. snowflake/snowpark_connect/includes/python/pyspark/pandas/typedef/__init__.py +18 -0
  556. snowflake/snowpark_connect/includes/python/pyspark/pandas/typedef/typehints.py +874 -0
  557. snowflake/snowpark_connect/includes/python/pyspark/pandas/usage_logging/__init__.py +143 -0
  558. snowflake/snowpark_connect/includes/python/pyspark/pandas/usage_logging/usage_logger.py +132 -0
  559. snowflake/snowpark_connect/includes/python/pyspark/pandas/utils.py +1063 -0
  560. snowflake/snowpark_connect/includes/python/pyspark/pandas/window.py +2702 -0
  561. snowflake/snowpark_connect/includes/python/pyspark/profiler.py +489 -0
  562. snowflake/snowpark_connect/includes/python/pyspark/py.typed +1 -0
  563. snowflake/snowpark_connect/includes/python/pyspark/python/pyspark/shell.py +123 -0
  564. snowflake/snowpark_connect/includes/python/pyspark/rdd.py +5518 -0
  565. snowflake/snowpark_connect/includes/python/pyspark/rddsampler.py +115 -0
  566. snowflake/snowpark_connect/includes/python/pyspark/resource/__init__.py +38 -0
  567. snowflake/snowpark_connect/includes/python/pyspark/resource/information.py +69 -0
  568. snowflake/snowpark_connect/includes/python/pyspark/resource/profile.py +317 -0
  569. snowflake/snowpark_connect/includes/python/pyspark/resource/requests.py +539 -0
  570. snowflake/snowpark_connect/includes/python/pyspark/resource/tests/__init__.py +16 -0
  571. snowflake/snowpark_connect/includes/python/pyspark/resource/tests/test_resources.py +83 -0
  572. snowflake/snowpark_connect/includes/python/pyspark/resultiterable.py +45 -0
  573. snowflake/snowpark_connect/includes/python/pyspark/serializers.py +681 -0
  574. snowflake/snowpark_connect/includes/python/pyspark/shell.py +123 -0
  575. snowflake/snowpark_connect/includes/python/pyspark/shuffle.py +854 -0
  576. snowflake/snowpark_connect/includes/python/pyspark/sql/__init__.py +75 -0
  577. snowflake/snowpark_connect/includes/python/pyspark/sql/_typing.pyi +80 -0
  578. snowflake/snowpark_connect/includes/python/pyspark/sql/avro/__init__.py +18 -0
  579. snowflake/snowpark_connect/includes/python/pyspark/sql/avro/functions.py +188 -0
  580. snowflake/snowpark_connect/includes/python/pyspark/sql/catalog.py +1270 -0
  581. snowflake/snowpark_connect/includes/python/pyspark/sql/column.py +1431 -0
  582. snowflake/snowpark_connect/includes/python/pyspark/sql/conf.py +99 -0
  583. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/__init__.py +18 -0
  584. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/_typing.py +90 -0
  585. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/avro/__init__.py +18 -0
  586. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/avro/functions.py +107 -0
  587. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/catalog.py +356 -0
  588. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/client/__init__.py +22 -0
  589. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/client/artifact.py +412 -0
  590. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/client/core.py +1689 -0
  591. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/client/reattach.py +340 -0
  592. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/column.py +514 -0
  593. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/conf.py +128 -0
  594. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/conversion.py +490 -0
  595. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/dataframe.py +2172 -0
  596. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/expressions.py +1056 -0
  597. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/functions.py +3937 -0
  598. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/group.py +418 -0
  599. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/plan.py +2289 -0
  600. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/proto/__init__.py +25 -0
  601. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/proto/base_pb2.py +203 -0
  602. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/proto/base_pb2.pyi +2718 -0
  603. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/proto/base_pb2_grpc.py +423 -0
  604. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/proto/catalog_pb2.py +109 -0
  605. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/proto/catalog_pb2.pyi +1130 -0
  606. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/proto/commands_pb2.py +141 -0
  607. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/proto/commands_pb2.pyi +1766 -0
  608. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/proto/common_pb2.py +47 -0
  609. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/proto/common_pb2.pyi +123 -0
  610. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/proto/example_plugins_pb2.py +53 -0
  611. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/proto/example_plugins_pb2.pyi +112 -0
  612. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/proto/expressions_pb2.py +107 -0
  613. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/proto/expressions_pb2.pyi +1507 -0
  614. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/proto/relations_pb2.py +195 -0
  615. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/proto/relations_pb2.pyi +3613 -0
  616. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/proto/types_pb2.py +95 -0
  617. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/proto/types_pb2.pyi +980 -0
  618. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/protobuf/__init__.py +18 -0
  619. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/protobuf/functions.py +166 -0
  620. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/readwriter.py +861 -0
  621. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/session.py +952 -0
  622. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/streaming/__init__.py +22 -0
  623. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/streaming/query.py +295 -0
  624. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/streaming/readwriter.py +618 -0
  625. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/streaming/worker/__init__.py +18 -0
  626. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/streaming/worker/foreach_batch_worker.py +87 -0
  627. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/streaming/worker/listener_worker.py +100 -0
  628. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/types.py +301 -0
  629. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/udf.py +296 -0
  630. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/udtf.py +200 -0
  631. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/utils.py +58 -0
  632. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/window.py +266 -0
  633. snowflake/snowpark_connect/includes/python/pyspark/sql/context.py +818 -0
  634. snowflake/snowpark_connect/includes/python/pyspark/sql/dataframe.py +5973 -0
  635. snowflake/snowpark_connect/includes/python/pyspark/sql/functions.py +15889 -0
  636. snowflake/snowpark_connect/includes/python/pyspark/sql/group.py +547 -0
  637. snowflake/snowpark_connect/includes/python/pyspark/sql/observation.py +152 -0
  638. snowflake/snowpark_connect/includes/python/pyspark/sql/pandas/__init__.py +21 -0
  639. snowflake/snowpark_connect/includes/python/pyspark/sql/pandas/_typing/__init__.pyi +344 -0
  640. snowflake/snowpark_connect/includes/python/pyspark/sql/pandas/_typing/protocols/__init__.pyi +17 -0
  641. snowflake/snowpark_connect/includes/python/pyspark/sql/pandas/_typing/protocols/frame.pyi +20 -0
  642. snowflake/snowpark_connect/includes/python/pyspark/sql/pandas/_typing/protocols/series.pyi +20 -0
  643. snowflake/snowpark_connect/includes/python/pyspark/sql/pandas/conversion.py +671 -0
  644. snowflake/snowpark_connect/includes/python/pyspark/sql/pandas/functions.py +480 -0
  645. snowflake/snowpark_connect/includes/python/pyspark/sql/pandas/functions.pyi +132 -0
  646. snowflake/snowpark_connect/includes/python/pyspark/sql/pandas/group_ops.py +523 -0
  647. snowflake/snowpark_connect/includes/python/pyspark/sql/pandas/map_ops.py +216 -0
  648. snowflake/snowpark_connect/includes/python/pyspark/sql/pandas/serializers.py +1019 -0
  649. snowflake/snowpark_connect/includes/python/pyspark/sql/pandas/typehints.py +172 -0
  650. snowflake/snowpark_connect/includes/python/pyspark/sql/pandas/types.py +972 -0
  651. snowflake/snowpark_connect/includes/python/pyspark/sql/pandas/utils.py +86 -0
  652. snowflake/snowpark_connect/includes/python/pyspark/sql/protobuf/__init__.py +18 -0
  653. snowflake/snowpark_connect/includes/python/pyspark/sql/protobuf/functions.py +334 -0
  654. snowflake/snowpark_connect/includes/python/pyspark/sql/readwriter.py +2159 -0
  655. snowflake/snowpark_connect/includes/python/pyspark/sql/session.py +2088 -0
  656. snowflake/snowpark_connect/includes/python/pyspark/sql/sql_formatter.py +84 -0
  657. snowflake/snowpark_connect/includes/python/pyspark/sql/streaming/__init__.py +21 -0
  658. snowflake/snowpark_connect/includes/python/pyspark/sql/streaming/listener.py +1050 -0
  659. snowflake/snowpark_connect/includes/python/pyspark/sql/streaming/query.py +746 -0
  660. snowflake/snowpark_connect/includes/python/pyspark/sql/streaming/readwriter.py +1652 -0
  661. snowflake/snowpark_connect/includes/python/pyspark/sql/streaming/state.py +288 -0
  662. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/__init__.py +16 -0
  663. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/__init__.py +16 -0
  664. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/client/__init__.py +16 -0
  665. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/client/test_artifact.py +420 -0
  666. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/client/test_client.py +358 -0
  667. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/streaming/__init__.py +16 -0
  668. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/streaming/test_parity_foreach.py +36 -0
  669. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/streaming/test_parity_foreach_batch.py +44 -0
  670. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/streaming/test_parity_listener.py +116 -0
  671. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/streaming/test_parity_streaming.py +35 -0
  672. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_connect_basic.py +3612 -0
  673. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_connect_column.py +1042 -0
  674. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_connect_function.py +2381 -0
  675. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_connect_plan.py +1060 -0
  676. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_arrow.py +163 -0
  677. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_arrow_map.py +38 -0
  678. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_arrow_python_udf.py +48 -0
  679. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_catalog.py +36 -0
  680. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_column.py +55 -0
  681. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_conf.py +36 -0
  682. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_dataframe.py +96 -0
  683. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_datasources.py +44 -0
  684. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_errors.py +36 -0
  685. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_functions.py +59 -0
  686. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_group.py +36 -0
  687. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_pandas_cogrouped_map.py +59 -0
  688. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_pandas_grouped_map.py +74 -0
  689. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_pandas_grouped_map_with_state.py +62 -0
  690. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_pandas_map.py +58 -0
  691. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_pandas_udf.py +70 -0
  692. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_pandas_udf_grouped_agg.py +50 -0
  693. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_pandas_udf_scalar.py +68 -0
  694. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_pandas_udf_window.py +40 -0
  695. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_readwriter.py +46 -0
  696. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_serde.py +44 -0
  697. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_types.py +100 -0
  698. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_udf.py +100 -0
  699. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_udtf.py +163 -0
  700. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_session.py +181 -0
  701. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_utils.py +42 -0
  702. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/__init__.py +16 -0
  703. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_cogrouped_map.py +623 -0
  704. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_grouped_map.py +869 -0
  705. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_grouped_map_with_state.py +342 -0
  706. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_map.py +436 -0
  707. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_udf.py +363 -0
  708. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_udf_grouped_agg.py +592 -0
  709. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_udf_scalar.py +1503 -0
  710. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_udf_typehints.py +392 -0
  711. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_udf_typehints_with_future_annotations.py +375 -0
  712. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_udf_window.py +411 -0
  713. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/streaming/__init__.py +16 -0
  714. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/streaming/test_streaming.py +401 -0
  715. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/streaming/test_streaming_foreach.py +295 -0
  716. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/streaming/test_streaming_foreach_batch.py +106 -0
  717. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/streaming/test_streaming_listener.py +558 -0
  718. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_arrow.py +1346 -0
  719. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_arrow_map.py +182 -0
  720. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_arrow_python_udf.py +202 -0
  721. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_catalog.py +503 -0
  722. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_column.py +225 -0
  723. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_conf.py +83 -0
  724. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_context.py +201 -0
  725. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_dataframe.py +1931 -0
  726. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_datasources.py +256 -0
  727. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_errors.py +69 -0
  728. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_functions.py +1349 -0
  729. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_group.py +53 -0
  730. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_pandas_sqlmetrics.py +68 -0
  731. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_readwriter.py +283 -0
  732. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_serde.py +155 -0
  733. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_session.py +412 -0
  734. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_types.py +1581 -0
  735. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_udf.py +961 -0
  736. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_udf_profiler.py +165 -0
  737. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_udtf.py +1456 -0
  738. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_utils.py +1686 -0
  739. snowflake/snowpark_connect/includes/python/pyspark/sql/types.py +2558 -0
  740. snowflake/snowpark_connect/includes/python/pyspark/sql/udf.py +714 -0
  741. snowflake/snowpark_connect/includes/python/pyspark/sql/udtf.py +325 -0
  742. snowflake/snowpark_connect/includes/python/pyspark/sql/utils.py +339 -0
  743. snowflake/snowpark_connect/includes/python/pyspark/sql/window.py +492 -0
  744. snowflake/snowpark_connect/includes/python/pyspark/statcounter.py +165 -0
  745. snowflake/snowpark_connect/includes/python/pyspark/status.py +112 -0
  746. snowflake/snowpark_connect/includes/python/pyspark/storagelevel.py +97 -0
  747. snowflake/snowpark_connect/includes/python/pyspark/streaming/__init__.py +22 -0
  748. snowflake/snowpark_connect/includes/python/pyspark/streaming/context.py +471 -0
  749. snowflake/snowpark_connect/includes/python/pyspark/streaming/dstream.py +933 -0
  750. snowflake/snowpark_connect/includes/python/pyspark/streaming/kinesis.py +205 -0
  751. snowflake/snowpark_connect/includes/python/pyspark/streaming/listener.py +83 -0
  752. snowflake/snowpark_connect/includes/python/pyspark/streaming/tests/__init__.py +16 -0
  753. snowflake/snowpark_connect/includes/python/pyspark/streaming/tests/test_context.py +184 -0
  754. snowflake/snowpark_connect/includes/python/pyspark/streaming/tests/test_dstream.py +706 -0
  755. snowflake/snowpark_connect/includes/python/pyspark/streaming/tests/test_kinesis.py +118 -0
  756. snowflake/snowpark_connect/includes/python/pyspark/streaming/tests/test_listener.py +160 -0
  757. snowflake/snowpark_connect/includes/python/pyspark/streaming/util.py +168 -0
  758. snowflake/snowpark_connect/includes/python/pyspark/taskcontext.py +502 -0
  759. snowflake/snowpark_connect/includes/python/pyspark/testing/__init__.py +21 -0
  760. snowflake/snowpark_connect/includes/python/pyspark/testing/connectutils.py +199 -0
  761. snowflake/snowpark_connect/includes/python/pyspark/testing/mllibutils.py +30 -0
  762. snowflake/snowpark_connect/includes/python/pyspark/testing/mlutils.py +275 -0
  763. snowflake/snowpark_connect/includes/python/pyspark/testing/objects.py +121 -0
  764. snowflake/snowpark_connect/includes/python/pyspark/testing/pandasutils.py +714 -0
  765. snowflake/snowpark_connect/includes/python/pyspark/testing/sqlutils.py +168 -0
  766. snowflake/snowpark_connect/includes/python/pyspark/testing/streamingutils.py +178 -0
  767. snowflake/snowpark_connect/includes/python/pyspark/testing/utils.py +636 -0
  768. snowflake/snowpark_connect/includes/python/pyspark/tests/__init__.py +16 -0
  769. snowflake/snowpark_connect/includes/python/pyspark/tests/test_appsubmit.py +306 -0
  770. snowflake/snowpark_connect/includes/python/pyspark/tests/test_broadcast.py +196 -0
  771. snowflake/snowpark_connect/includes/python/pyspark/tests/test_conf.py +44 -0
  772. snowflake/snowpark_connect/includes/python/pyspark/tests/test_context.py +346 -0
  773. snowflake/snowpark_connect/includes/python/pyspark/tests/test_daemon.py +89 -0
  774. snowflake/snowpark_connect/includes/python/pyspark/tests/test_install_spark.py +124 -0
  775. snowflake/snowpark_connect/includes/python/pyspark/tests/test_join.py +69 -0
  776. snowflake/snowpark_connect/includes/python/pyspark/tests/test_memory_profiler.py +167 -0
  777. snowflake/snowpark_connect/includes/python/pyspark/tests/test_pin_thread.py +194 -0
  778. snowflake/snowpark_connect/includes/python/pyspark/tests/test_profiler.py +168 -0
  779. snowflake/snowpark_connect/includes/python/pyspark/tests/test_rdd.py +939 -0
  780. snowflake/snowpark_connect/includes/python/pyspark/tests/test_rddbarrier.py +52 -0
  781. snowflake/snowpark_connect/includes/python/pyspark/tests/test_rddsampler.py +66 -0
  782. snowflake/snowpark_connect/includes/python/pyspark/tests/test_readwrite.py +368 -0
  783. snowflake/snowpark_connect/includes/python/pyspark/tests/test_serializers.py +257 -0
  784. snowflake/snowpark_connect/includes/python/pyspark/tests/test_shuffle.py +267 -0
  785. snowflake/snowpark_connect/includes/python/pyspark/tests/test_stage_sched.py +153 -0
  786. snowflake/snowpark_connect/includes/python/pyspark/tests/test_statcounter.py +130 -0
  787. snowflake/snowpark_connect/includes/python/pyspark/tests/test_taskcontext.py +350 -0
  788. snowflake/snowpark_connect/includes/python/pyspark/tests/test_util.py +97 -0
  789. snowflake/snowpark_connect/includes/python/pyspark/tests/test_worker.py +271 -0
  790. snowflake/snowpark_connect/includes/python/pyspark/traceback_utils.py +81 -0
  791. snowflake/snowpark_connect/includes/python/pyspark/util.py +416 -0
  792. snowflake/snowpark_connect/includes/python/pyspark/version.py +19 -0
  793. snowflake/snowpark_connect/includes/python/pyspark/worker.py +1307 -0
  794. snowflake/snowpark_connect/includes/python/pyspark/worker_util.py +46 -0
  795. snowflake/snowpark_connect/proto/__init__.py +10 -0
  796. snowflake/snowpark_connect/proto/control_pb2.py +35 -0
  797. snowflake/snowpark_connect/proto/control_pb2.pyi +38 -0
  798. snowflake/snowpark_connect/proto/control_pb2_grpc.py +183 -0
  799. snowflake/snowpark_connect/proto/snowflake_expression_ext_pb2.py +35 -0
  800. snowflake/snowpark_connect/proto/snowflake_expression_ext_pb2.pyi +53 -0
  801. snowflake/snowpark_connect/proto/snowflake_rdd_pb2.pyi +39 -0
  802. snowflake/snowpark_connect/proto/snowflake_relation_ext_pb2.py +47 -0
  803. snowflake/snowpark_connect/proto/snowflake_relation_ext_pb2.pyi +111 -0
  804. snowflake/snowpark_connect/relation/__init__.py +3 -0
  805. snowflake/snowpark_connect/relation/catalogs/__init__.py +12 -0
  806. snowflake/snowpark_connect/relation/catalogs/abstract_spark_catalog.py +287 -0
  807. snowflake/snowpark_connect/relation/catalogs/snowflake_catalog.py +467 -0
  808. snowflake/snowpark_connect/relation/catalogs/utils.py +51 -0
  809. snowflake/snowpark_connect/relation/io_utils.py +76 -0
  810. snowflake/snowpark_connect/relation/map_aggregate.py +322 -0
  811. snowflake/snowpark_connect/relation/map_catalog.py +151 -0
  812. snowflake/snowpark_connect/relation/map_column_ops.py +1068 -0
  813. snowflake/snowpark_connect/relation/map_crosstab.py +48 -0
  814. snowflake/snowpark_connect/relation/map_extension.py +412 -0
  815. snowflake/snowpark_connect/relation/map_join.py +341 -0
  816. snowflake/snowpark_connect/relation/map_local_relation.py +326 -0
  817. snowflake/snowpark_connect/relation/map_map_partitions.py +146 -0
  818. snowflake/snowpark_connect/relation/map_relation.py +253 -0
  819. snowflake/snowpark_connect/relation/map_row_ops.py +716 -0
  820. snowflake/snowpark_connect/relation/map_sample_by.py +35 -0
  821. snowflake/snowpark_connect/relation/map_show_string.py +50 -0
  822. snowflake/snowpark_connect/relation/map_sql.py +1874 -0
  823. snowflake/snowpark_connect/relation/map_stats.py +324 -0
  824. snowflake/snowpark_connect/relation/map_subquery_alias.py +32 -0
  825. snowflake/snowpark_connect/relation/map_udtf.py +288 -0
  826. snowflake/snowpark_connect/relation/read/__init__.py +7 -0
  827. snowflake/snowpark_connect/relation/read/jdbc_read_dbapi.py +668 -0
  828. snowflake/snowpark_connect/relation/read/map_read.py +367 -0
  829. snowflake/snowpark_connect/relation/read/map_read_csv.py +142 -0
  830. snowflake/snowpark_connect/relation/read/map_read_jdbc.py +108 -0
  831. snowflake/snowpark_connect/relation/read/map_read_json.py +344 -0
  832. snowflake/snowpark_connect/relation/read/map_read_parquet.py +194 -0
  833. snowflake/snowpark_connect/relation/read/map_read_socket.py +59 -0
  834. snowflake/snowpark_connect/relation/read/map_read_table.py +109 -0
  835. snowflake/snowpark_connect/relation/read/map_read_text.py +106 -0
  836. snowflake/snowpark_connect/relation/read/reader_config.py +399 -0
  837. snowflake/snowpark_connect/relation/read/utils.py +155 -0
  838. snowflake/snowpark_connect/relation/stage_locator.py +161 -0
  839. snowflake/snowpark_connect/relation/utils.py +219 -0
  840. snowflake/snowpark_connect/relation/write/__init__.py +3 -0
  841. snowflake/snowpark_connect/relation/write/jdbc_write_dbapi.py +339 -0
  842. snowflake/snowpark_connect/relation/write/map_write.py +436 -0
  843. snowflake/snowpark_connect/relation/write/map_write_jdbc.py +48 -0
  844. snowflake/snowpark_connect/resources/java_udfs-1.0-SNAPSHOT.jar +0 -0
  845. snowflake/snowpark_connect/resources_initializer.py +75 -0
  846. snowflake/snowpark_connect/server.py +1136 -0
  847. snowflake/snowpark_connect/start_server.py +32 -0
  848. snowflake/snowpark_connect/tcm.py +8 -0
  849. snowflake/snowpark_connect/type_mapping.py +1003 -0
  850. snowflake/snowpark_connect/typed_column.py +94 -0
  851. snowflake/snowpark_connect/utils/__init__.py +3 -0
  852. snowflake/snowpark_connect/utils/artifacts.py +48 -0
  853. snowflake/snowpark_connect/utils/attribute_handling.py +72 -0
  854. snowflake/snowpark_connect/utils/cache.py +84 -0
  855. snowflake/snowpark_connect/utils/concurrent.py +124 -0
  856. snowflake/snowpark_connect/utils/context.py +390 -0
  857. snowflake/snowpark_connect/utils/describe_query_cache.py +231 -0
  858. snowflake/snowpark_connect/utils/interrupt.py +85 -0
  859. snowflake/snowpark_connect/utils/io_utils.py +35 -0
  860. snowflake/snowpark_connect/utils/pandas_udtf_utils.py +117 -0
  861. snowflake/snowpark_connect/utils/profiling.py +47 -0
  862. snowflake/snowpark_connect/utils/session.py +180 -0
  863. snowflake/snowpark_connect/utils/snowpark_connect_logging.py +38 -0
  864. snowflake/snowpark_connect/utils/telemetry.py +513 -0
  865. snowflake/snowpark_connect/utils/udf_cache.py +392 -0
  866. snowflake/snowpark_connect/utils/udf_helper.py +328 -0
  867. snowflake/snowpark_connect/utils/udf_utils.py +310 -0
  868. snowflake/snowpark_connect/utils/udtf_helper.py +420 -0
  869. snowflake/snowpark_connect/utils/udtf_utils.py +799 -0
  870. snowflake/snowpark_connect/utils/xxhash64.py +247 -0
  871. snowflake/snowpark_connect/version.py +6 -0
  872. snowpark_connect-0.20.2.data/scripts/snowpark-connect +71 -0
  873. snowpark_connect-0.20.2.data/scripts/snowpark-session +11 -0
  874. snowpark_connect-0.20.2.data/scripts/snowpark-submit +354 -0
  875. snowpark_connect-0.20.2.dist-info/METADATA +37 -0
  876. snowpark_connect-0.20.2.dist-info/RECORD +879 -0
  877. snowpark_connect-0.20.2.dist-info/WHEEL +5 -0
  878. snowpark_connect-0.20.2.dist-info/licenses/LICENSE.txt +202 -0
  879. snowpark_connect-0.20.2.dist-info/top_level.txt +1 -0
@@ -0,0 +1,3937 @@
1
+ #
2
+ # Licensed to the Apache Software Foundation (ASF) under one or more
3
+ # contributor license agreements. See the NOTICE file distributed with
4
+ # this work for additional information regarding copyright ownership.
5
+ # The ASF licenses this file to You under the Apache License, Version 2.0
6
+ # (the "License"); you may not use this file except in compliance with
7
+ # the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing, software
12
+ # distributed under the License is distributed on an "AS IS" BASIS,
13
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ # See the License for the specific language governing permissions and
15
+ # limitations under the License.
16
+ #
17
+ from pyspark.sql.connect.utils import check_dependencies
18
+
19
+ check_dependencies(__name__)
20
+
21
+ import decimal
22
+ import inspect
23
+ import warnings
24
+ import functools
25
+ from typing import (
26
+ Any,
27
+ Dict,
28
+ TYPE_CHECKING,
29
+ Union,
30
+ List,
31
+ overload,
32
+ Optional,
33
+ Tuple,
34
+ Type,
35
+ Callable,
36
+ ValuesView,
37
+ cast,
38
+ )
39
+
40
+ import numpy as np
41
+
42
+ from pyspark.errors import PySparkTypeError, PySparkValueError
43
+ from pyspark.sql.connect.column import Column
44
+ from pyspark.sql.connect.expressions import (
45
+ CaseWhen,
46
+ Expression,
47
+ LiteralExpression,
48
+ ColumnReference,
49
+ UnresolvedFunction,
50
+ UnresolvedStar,
51
+ SQLExpression,
52
+ LambdaFunction,
53
+ UnresolvedNamedLambdaVariable,
54
+ CallFunction,
55
+ )
56
+ from pyspark.sql.connect.udf import _create_py_udf
57
+ from pyspark.sql.connect.udtf import _create_py_udtf
58
+ from pyspark.sql import functions as pysparkfuncs
59
+ from pyspark.sql.types import _from_numpy_type, DataType, StructType, ArrayType, StringType
60
+
61
+ # The implementation of pandas_udf is embedded in pyspark.sql.function.pandas_udf
62
+ # for code reuse.
63
+ from pyspark.sql.functions import pandas_udf # noqa: F401
64
+
65
+
66
+ if TYPE_CHECKING:
67
+ from pyspark.sql.connect._typing import (
68
+ ColumnOrName,
69
+ DataTypeOrString,
70
+ UserDefinedFunctionLike,
71
+ )
72
+ from pyspark.sql.connect.dataframe import DataFrame
73
+ from pyspark.sql.connect.udtf import UserDefinedTableFunction
74
+
75
+
76
+ def _to_col_with_plan_id(col: str, plan_id: Optional[int]) -> Column:
77
+ if col == "*":
78
+ return Column(UnresolvedStar(unparsed_target=None))
79
+ elif col.endswith(".*"):
80
+ return Column(UnresolvedStar(unparsed_target=col))
81
+ else:
82
+ return Column(ColumnReference(unparsed_identifier=col, plan_id=plan_id))
83
+
84
+
85
+ def _to_col(col: "ColumnOrName") -> Column:
86
+ assert isinstance(col, (Column, str))
87
+ return col if isinstance(col, Column) else column(col)
88
+
89
+
90
+ def _invoke_function(name: str, *args: Union[Column, Expression]) -> Column:
91
+ """
92
+ Simple wrapper function that converts the arguments into the appropriate types.
93
+ Parameters
94
+ ----------
95
+ name Name of the function to be called.
96
+ args The list of arguments.
97
+
98
+ Returns
99
+ -------
100
+ :class:`Column`
101
+ """
102
+ expressions: List[Expression] = []
103
+ for arg in args:
104
+ assert isinstance(arg, (Column, Expression))
105
+ if isinstance(arg, Column):
106
+ expressions.append(arg._expr)
107
+ else:
108
+ expressions.append(arg)
109
+ return Column(UnresolvedFunction(name, expressions))
110
+
111
+
112
+ def _invoke_function_over_columns(name: str, *cols: "ColumnOrName") -> Column:
113
+ """
114
+ Invokes n-ary function identified by name
115
+ and wraps the result with :class:`~pyspark.sql.Column`.
116
+ """
117
+ _cols = [_to_col(c) for c in cols]
118
+ return _invoke_function(name, *_cols)
119
+
120
+
121
+ def _invoke_binary_math_function(name: str, col1: Any, col2: Any) -> Column:
122
+ """
123
+ Invokes binary math function identified by name
124
+ and wraps the result with :class:`~pyspark.sql.Column`.
125
+ """
126
+
127
+ # For legacy reasons, the arguments here can be implicitly converted into column
128
+ _cols = [_to_col(c) if isinstance(c, (str, Column)) else lit(c) for c in (col1, col2)]
129
+ return _invoke_function(name, *_cols)
130
+
131
+
132
+ def _get_lambda_parameters(f: Callable) -> ValuesView[inspect.Parameter]:
133
+ signature = inspect.signature(f)
134
+ parameters = signature.parameters.values()
135
+
136
+ # We should exclude functions that use, variable args and keyword argument
137
+ # names, as well as keyword only args.
138
+ supported_parameter_types = {
139
+ inspect.Parameter.POSITIONAL_OR_KEYWORD,
140
+ inspect.Parameter.POSITIONAL_ONLY,
141
+ }
142
+
143
+ # Validate that the function arity is between 1 and 3.
144
+ if not (1 <= len(parameters) <= 3):
145
+ raise PySparkValueError(
146
+ error_class="WRONG_NUM_ARGS_FOR_HIGHER_ORDER_FUNCTION",
147
+ message_parameters={"func_name": f.__name__, "num_args": str(len(parameters))},
148
+ )
149
+
150
+ # Verify that all arguments can be used as positional arguments.
151
+ if not all(p.kind in supported_parameter_types for p in parameters):
152
+ raise PySparkValueError(
153
+ error_class="UNSUPPORTED_PARAM_TYPE_FOR_HIGHER_ORDER_FUNCTION",
154
+ message_parameters={"func_name": f.__name__},
155
+ )
156
+
157
+ return parameters
158
+
159
+
160
+ def _create_lambda(f: Callable) -> LambdaFunction:
161
+ """
162
+ Create `o.a.s.sql.expressions.LambdaFunction` corresponding
163
+ to transformation described by f
164
+
165
+ :param f: A Python of one of the following forms:
166
+ - (Column) -> Column: ...
167
+ - (Column, Column) -> Column: ...
168
+ - (Column, Column, Column) -> Column: ...
169
+ """
170
+ parameters = _get_lambda_parameters(f)
171
+
172
+ arg_names = ["x", "y", "z"][: len(parameters)]
173
+ arg_exprs = [
174
+ UnresolvedNamedLambdaVariable([UnresolvedNamedLambdaVariable.fresh_var_name(arg_name)])
175
+ for arg_name in arg_names
176
+ ]
177
+ arg_cols = [Column(arg_expr) for arg_expr in arg_exprs]
178
+
179
+ result = f(*arg_cols)
180
+
181
+ if not isinstance(result, Column):
182
+ raise PySparkValueError(
183
+ error_class="HIGHER_ORDER_FUNCTION_SHOULD_RETURN_COLUMN",
184
+ message_parameters={"func_name": f.__name__, "return_type": type(result).__name__},
185
+ )
186
+
187
+ return LambdaFunction(result._expr, arg_exprs)
188
+
189
+
190
+ def _invoke_higher_order_function(
191
+ name: str,
192
+ cols: List["ColumnOrName"],
193
+ funs: List[Callable],
194
+ ) -> Column:
195
+ """
196
+ Invokes expression identified by name,
197
+ (relative to ```org.apache.spark.sql.catalyst.expressions``)
198
+ and wraps the result with Column (first Scala one, then Python).
199
+
200
+ :param name: Name of the expression
201
+ :param cols: a list of columns
202
+ :param funs: a list of (*Column) -> Column functions.
203
+
204
+ :return: a Column
205
+ """
206
+ _cols = [_to_col(c) for c in cols]
207
+ _funs = [_create_lambda(f) for f in funs]
208
+
209
+ return _invoke_function(name, *_cols, *_funs)
210
+
211
+
212
+ def _options_to_col(options: Dict[str, Any]) -> Column:
213
+ _options: List[Column] = []
214
+ for k, v in options.items():
215
+ _options.append(lit(str(k)))
216
+ _options.append(lit(str(v)))
217
+ return create_map(*_options)
218
+
219
+
220
+ # Normal Functions
221
+
222
+
223
+ def col(col: str) -> Column:
224
+ return _to_col_with_plan_id(col=col, plan_id=None)
225
+
226
+
227
+ col.__doc__ = pysparkfuncs.col.__doc__
228
+
229
+
230
+ column = col
231
+
232
+
233
+ def lit(col: Any) -> Column:
234
+ if isinstance(col, Column):
235
+ return col
236
+ elif isinstance(col, list):
237
+ if any(isinstance(c, Column) for c in col):
238
+ raise PySparkValueError(
239
+ error_class="COLUMN_IN_LIST", message_parameters={"func_name": "lit"}
240
+ )
241
+ return array(*[lit(c) for c in col])
242
+ elif isinstance(col, np.ndarray) and col.ndim == 1:
243
+ if _from_numpy_type(col.dtype) is None:
244
+ raise PySparkTypeError(
245
+ error_class="UNSUPPORTED_NUMPY_ARRAY_SCALAR",
246
+ message_parameters={"dtype": col.dtype.name},
247
+ )
248
+
249
+ # NumpyArrayConverter for Py4J can not support ndarray with int8 values.
250
+ # Actually this is not a problem for Connect, but here still convert it
251
+ # to int16 for compatibility.
252
+ if col.dtype == np.int8:
253
+ col = col.astype(np.int16)
254
+
255
+ return array(*[lit(c) for c in col])
256
+ else:
257
+ return Column(LiteralExpression._from_value(col))
258
+
259
+
260
+ lit.__doc__ = pysparkfuncs.lit.__doc__
261
+
262
+
263
+ def bitwiseNOT(col: "ColumnOrName") -> Column:
264
+ warnings.warn("Deprecated in 3.4, use bitwise_not instead.", FutureWarning)
265
+ return bitwise_not(col)
266
+
267
+
268
+ bitwiseNOT.__doc__ = pysparkfuncs.bitwiseNOT.__doc__
269
+
270
+
271
+ def bitwise_not(col: "ColumnOrName") -> Column:
272
+ return _invoke_function_over_columns("~", col)
273
+
274
+
275
+ bitwise_not.__doc__ = pysparkfuncs.bitwise_not.__doc__
276
+
277
+
278
+ def bit_count(col: "ColumnOrName") -> Column:
279
+ return _invoke_function_over_columns("bit_count", col)
280
+
281
+
282
+ bit_count.__doc__ = pysparkfuncs.bit_count.__doc__
283
+
284
+
285
+ def bit_get(col: "ColumnOrName", pos: "ColumnOrName") -> Column:
286
+ return _invoke_function_over_columns("bit_get", col, pos)
287
+
288
+
289
+ bit_get.__doc__ = pysparkfuncs.bit_get.__doc__
290
+
291
+
292
+ def getbit(col: "ColumnOrName", pos: "ColumnOrName") -> Column:
293
+ return _invoke_function_over_columns("getbit", col, pos)
294
+
295
+
296
+ getbit.__doc__ = pysparkfuncs.getbit.__doc__
297
+
298
+
299
+ def broadcast(df: "DataFrame") -> "DataFrame":
300
+ from pyspark.sql.connect.dataframe import DataFrame
301
+
302
+ if not isinstance(df, DataFrame):
303
+ raise PySparkTypeError(
304
+ error_class="NOT_DATAFRAME",
305
+ message_parameters={"arg_name": "df", "arg_type": type(df).__name__},
306
+ )
307
+ return df.hint("broadcast")
308
+
309
+
310
+ broadcast.__doc__ = pysparkfuncs.broadcast.__doc__
311
+
312
+
313
+ def coalesce(*cols: "ColumnOrName") -> Column:
314
+ return _invoke_function_over_columns("coalesce", *cols)
315
+
316
+
317
+ coalesce.__doc__ = pysparkfuncs.coalesce.__doc__
318
+
319
+
320
+ def expr(str: str) -> Column:
321
+ return Column(SQLExpression(str))
322
+
323
+
324
+ expr.__doc__ = pysparkfuncs.expr.__doc__
325
+
326
+
327
+ def greatest(*cols: "ColumnOrName") -> Column:
328
+ if len(cols) < 2:
329
+ raise PySparkValueError(
330
+ error_class="WRONG_NUM_COLUMNS",
331
+ message_parameters={"func_name": "greatest", "num_cols": "2"},
332
+ )
333
+ return _invoke_function_over_columns("greatest", *cols)
334
+
335
+
336
+ greatest.__doc__ = pysparkfuncs.greatest.__doc__
337
+
338
+
339
+ def input_file_name() -> Column:
340
+ return _invoke_function("input_file_name")
341
+
342
+
343
+ input_file_name.__doc__ = pysparkfuncs.input_file_name.__doc__
344
+
345
+
346
+ def least(*cols: "ColumnOrName") -> Column:
347
+ if len(cols) < 2:
348
+ raise PySparkValueError(
349
+ error_class="WRONG_NUM_COLUMNS",
350
+ message_parameters={"func_name": "least", "num_cols": "2"},
351
+ )
352
+ return _invoke_function_over_columns("least", *cols)
353
+
354
+
355
+ least.__doc__ = pysparkfuncs.least.__doc__
356
+
357
+
358
+ def isnan(col: "ColumnOrName") -> Column:
359
+ return _invoke_function_over_columns("isnan", col)
360
+
361
+
362
+ isnan.__doc__ = pysparkfuncs.isnan.__doc__
363
+
364
+
365
+ def isnull(col: "ColumnOrName") -> Column:
366
+ return _invoke_function_over_columns("isnull", col)
367
+
368
+
369
+ isnull.__doc__ = pysparkfuncs.isnull.__doc__
370
+
371
+
372
+ def monotonically_increasing_id() -> Column:
373
+ return _invoke_function("monotonically_increasing_id")
374
+
375
+
376
+ monotonically_increasing_id.__doc__ = pysparkfuncs.monotonically_increasing_id.__doc__
377
+
378
+
379
+ def nanvl(col1: "ColumnOrName", col2: "ColumnOrName") -> Column:
380
+ return _invoke_function_over_columns("nanvl", col1, col2)
381
+
382
+
383
+ nanvl.__doc__ = pysparkfuncs.nanvl.__doc__
384
+
385
+
386
+ def rand(seed: Optional[int] = None) -> Column:
387
+ if seed is not None:
388
+ return _invoke_function("rand", lit(seed))
389
+ else:
390
+ return _invoke_function("rand")
391
+
392
+
393
+ rand.__doc__ = pysparkfuncs.rand.__doc__
394
+
395
+
396
+ def randn(seed: Optional[int] = None) -> Column:
397
+ if seed is not None:
398
+ return _invoke_function("randn", lit(seed))
399
+ else:
400
+ return _invoke_function("randn")
401
+
402
+
403
+ randn.__doc__ = pysparkfuncs.randn.__doc__
404
+
405
+
406
+ def spark_partition_id() -> Column:
407
+ return _invoke_function("spark_partition_id")
408
+
409
+
410
+ spark_partition_id.__doc__ = pysparkfuncs.spark_partition_id.__doc__
411
+
412
+
413
+ def when(condition: Column, value: Any) -> Column:
414
+ # Explicitly not using ColumnOrName type here to make reading condition less opaque
415
+ if not isinstance(condition, Column):
416
+ raise PySparkTypeError(
417
+ error_class="NOT_COLUMN",
418
+ message_parameters={"arg_name": "condition", "arg_type": type(condition).__name__},
419
+ )
420
+
421
+ value_col = value if isinstance(value, Column) else lit(value)
422
+
423
+ return Column(CaseWhen(branches=[(condition._expr, value_col._expr)], else_value=None))
424
+
425
+
426
+ when.__doc__ = pysparkfuncs.when.__doc__
427
+
428
+
429
+ # Sort Functions
430
+
431
+
432
+ def asc(col: "ColumnOrName") -> Column:
433
+ return _to_col(col).asc()
434
+
435
+
436
+ asc.__doc__ = pysparkfuncs.asc.__doc__
437
+
438
+
439
+ def asc_nulls_first(col: "ColumnOrName") -> Column:
440
+ return _to_col(col).asc_nulls_first()
441
+
442
+
443
+ asc_nulls_first.__doc__ = pysparkfuncs.asc_nulls_first.__doc__
444
+
445
+
446
+ def asc_nulls_last(col: "ColumnOrName") -> Column:
447
+ return _to_col(col).asc_nulls_last()
448
+
449
+
450
+ asc_nulls_last.__doc__ = pysparkfuncs.asc_nulls_last.__doc__
451
+
452
+
453
+ def desc(col: "ColumnOrName") -> Column:
454
+ return _to_col(col).desc()
455
+
456
+
457
+ desc.__doc__ = pysparkfuncs.desc.__doc__
458
+
459
+
460
+ def desc_nulls_first(col: "ColumnOrName") -> Column:
461
+ return _to_col(col).desc_nulls_first()
462
+
463
+
464
+ desc_nulls_first.__doc__ = pysparkfuncs.desc_nulls_first.__doc__
465
+
466
+
467
+ def desc_nulls_last(col: "ColumnOrName") -> Column:
468
+ return _to_col(col).desc_nulls_last()
469
+
470
+
471
+ desc_nulls_last.__doc__ = pysparkfuncs.desc_nulls_last.__doc__
472
+
473
+
474
+ # Math Functions
475
+
476
+
477
+ def abs(col: "ColumnOrName") -> Column:
478
+ return _invoke_function_over_columns("abs", col)
479
+
480
+
481
+ abs.__doc__ = pysparkfuncs.abs.__doc__
482
+
483
+
484
+ def acos(col: "ColumnOrName") -> Column:
485
+ return _invoke_function_over_columns("acos", col)
486
+
487
+
488
+ acos.__doc__ = pysparkfuncs.acos.__doc__
489
+
490
+
491
+ def acosh(col: "ColumnOrName") -> Column:
492
+ return _invoke_function_over_columns("acosh", col)
493
+
494
+
495
+ acosh.__doc__ = pysparkfuncs.acosh.__doc__
496
+
497
+
498
+ def asin(col: "ColumnOrName") -> Column:
499
+ return _invoke_function_over_columns("asin", col)
500
+
501
+
502
+ asin.__doc__ = pysparkfuncs.asin.__doc__
503
+
504
+
505
+ def asinh(col: "ColumnOrName") -> Column:
506
+ return _invoke_function_over_columns("asinh", col)
507
+
508
+
509
+ asinh.__doc__ = pysparkfuncs.asinh.__doc__
510
+
511
+
512
+ def atan(col: "ColumnOrName") -> Column:
513
+ return _invoke_function_over_columns("atan", col)
514
+
515
+
516
+ atan.__doc__ = pysparkfuncs.atan.__doc__
517
+
518
+
519
+ def atan2(col1: Union["ColumnOrName", float], col2: Union["ColumnOrName", float]) -> Column:
520
+ return _invoke_binary_math_function("atan2", col1, col2)
521
+
522
+
523
+ atan2.__doc__ = pysparkfuncs.atan2.__doc__
524
+
525
+
526
+ def atanh(col: "ColumnOrName") -> Column:
527
+ return _invoke_function_over_columns("atanh", col)
528
+
529
+
530
+ atanh.__doc__ = pysparkfuncs.atanh.__doc__
531
+
532
+
533
+ def bin(col: "ColumnOrName") -> Column:
534
+ return _invoke_function_over_columns("bin", col)
535
+
536
+
537
+ bin.__doc__ = pysparkfuncs.bin.__doc__
538
+
539
+
540
+ def bround(col: "ColumnOrName", scale: int = 0) -> Column:
541
+ return _invoke_function("bround", _to_col(col), lit(scale))
542
+
543
+
544
+ bround.__doc__ = pysparkfuncs.bround.__doc__
545
+
546
+
547
+ def cbrt(col: "ColumnOrName") -> Column:
548
+ return _invoke_function_over_columns("cbrt", col)
549
+
550
+
551
+ cbrt.__doc__ = pysparkfuncs.cbrt.__doc__
552
+
553
+
554
+ def ceil(col: "ColumnOrName") -> Column:
555
+ return _invoke_function_over_columns("ceil", col)
556
+
557
+
558
+ ceil.__doc__ = pysparkfuncs.ceil.__doc__
559
+
560
+
561
+ def ceiling(col: "ColumnOrName") -> Column:
562
+ return _invoke_function_over_columns("ceiling", col)
563
+
564
+
565
+ ceiling.__doc__ = pysparkfuncs.ceiling.__doc__
566
+
567
+
568
+ def conv(col: "ColumnOrName", fromBase: int, toBase: int) -> Column:
569
+ return _invoke_function("conv", _to_col(col), lit(fromBase), lit(toBase))
570
+
571
+
572
+ conv.__doc__ = pysparkfuncs.conv.__doc__
573
+
574
+
575
+ def cos(col: "ColumnOrName") -> Column:
576
+ return _invoke_function_over_columns("cos", col)
577
+
578
+
579
+ cos.__doc__ = pysparkfuncs.cos.__doc__
580
+
581
+
582
+ def cosh(col: "ColumnOrName") -> Column:
583
+ return _invoke_function_over_columns("cosh", col)
584
+
585
+
586
+ cosh.__doc__ = pysparkfuncs.cosh.__doc__
587
+
588
+
589
+ def cot(col: "ColumnOrName") -> Column:
590
+ return _invoke_function_over_columns("cot", col)
591
+
592
+
593
+ cot.__doc__ = pysparkfuncs.cot.__doc__
594
+
595
+
596
+ def csc(col: "ColumnOrName") -> Column:
597
+ return _invoke_function_over_columns("csc", col)
598
+
599
+
600
+ csc.__doc__ = pysparkfuncs.csc.__doc__
601
+
602
+
603
+ def degrees(col: "ColumnOrName") -> Column:
604
+ return _invoke_function_over_columns("degrees", col)
605
+
606
+
607
+ degrees.__doc__ = pysparkfuncs.degrees.__doc__
608
+
609
+
610
+ def e() -> Column:
611
+ return _invoke_function("e")
612
+
613
+
614
+ e.__doc__ = pysparkfuncs.e.__doc__
615
+
616
+
617
+ def exp(col: "ColumnOrName") -> Column:
618
+ return _invoke_function_over_columns("exp", col)
619
+
620
+
621
+ exp.__doc__ = pysparkfuncs.exp.__doc__
622
+
623
+
624
+ def expm1(col: "ColumnOrName") -> Column:
625
+ return _invoke_function_over_columns("expm1", col)
626
+
627
+
628
+ expm1.__doc__ = pysparkfuncs.expm1.__doc__
629
+
630
+
631
+ def factorial(col: "ColumnOrName") -> Column:
632
+ return _invoke_function_over_columns("factorial", col)
633
+
634
+
635
+ factorial.__doc__ = pysparkfuncs.factorial.__doc__
636
+
637
+
638
+ def floor(col: "ColumnOrName") -> Column:
639
+ return _invoke_function_over_columns("floor", col)
640
+
641
+
642
+ floor.__doc__ = pysparkfuncs.floor.__doc__
643
+
644
+
645
+ def hex(col: "ColumnOrName") -> Column:
646
+ return _invoke_function_over_columns("hex", col)
647
+
648
+
649
+ hex.__doc__ = pysparkfuncs.hex.__doc__
650
+
651
+
652
+ def hypot(col1: Union["ColumnOrName", float], col2: Union["ColumnOrName", float]) -> Column:
653
+ return _invoke_binary_math_function("hypot", col1, col2)
654
+
655
+
656
+ hypot.__doc__ = pysparkfuncs.hypot.__doc__
657
+
658
+
659
+ def log(arg1: Union["ColumnOrName", float], arg2: Optional["ColumnOrName"] = None) -> Column:
660
+ if arg2 is None:
661
+ # in this case, arg1 should be "ColumnOrName"
662
+ return _invoke_function("ln", _to_col(cast("ColumnOrName", arg1)))
663
+ else:
664
+ # in this case, arg1 should be a float
665
+ return _invoke_function("log", lit(cast(float, arg1)), _to_col(arg2))
666
+
667
+
668
+ log.__doc__ = pysparkfuncs.log.__doc__
669
+
670
+
671
+ def log10(col: "ColumnOrName") -> Column:
672
+ return _invoke_function_over_columns("log10", col)
673
+
674
+
675
+ log10.__doc__ = pysparkfuncs.log10.__doc__
676
+
677
+
678
+ def log1p(col: "ColumnOrName") -> Column:
679
+ return _invoke_function_over_columns("log1p", col)
680
+
681
+
682
+ log1p.__doc__ = pysparkfuncs.log1p.__doc__
683
+
684
+
685
+ def ln(col: "ColumnOrName") -> Column:
686
+ return _invoke_function_over_columns("ln", col)
687
+
688
+
689
+ ln.__doc__ = pysparkfuncs.ln.__doc__
690
+
691
+
692
+ def log2(col: "ColumnOrName") -> Column:
693
+ return _invoke_function_over_columns("log2", col)
694
+
695
+
696
+ log2.__doc__ = pysparkfuncs.log2.__doc__
697
+
698
+
699
+ def negative(col: "ColumnOrName") -> Column:
700
+ return _invoke_function_over_columns("negative", col)
701
+
702
+
703
+ negative.__doc__ = pysparkfuncs.negative.__doc__
704
+
705
+
706
+ negate = negative
707
+
708
+
709
+ def pi() -> Column:
710
+ return _invoke_function("pi")
711
+
712
+
713
+ pi.__doc__ = pysparkfuncs.pi.__doc__
714
+
715
+
716
+ def positive(col: "ColumnOrName") -> Column:
717
+ return _invoke_function_over_columns("positive", col)
718
+
719
+
720
+ positive.__doc__ = pysparkfuncs.positive.__doc__
721
+
722
+
723
+ def pmod(dividend: Union["ColumnOrName", float], divisor: Union["ColumnOrName", float]) -> Column:
724
+ return _invoke_binary_math_function("pmod", dividend, divisor)
725
+
726
+
727
+ pmod.__doc__ = pysparkfuncs.pmod.__doc__
728
+
729
+
730
+ def width_bucket(
731
+ v: "ColumnOrName",
732
+ min: "ColumnOrName",
733
+ max: "ColumnOrName",
734
+ numBucket: Union["ColumnOrName", int],
735
+ ) -> Column:
736
+ numBucket = lit(numBucket) if isinstance(numBucket, int) else numBucket
737
+ return _invoke_function_over_columns("width_bucket", v, min, max, numBucket)
738
+
739
+
740
+ width_bucket.__doc__ = pysparkfuncs.width_bucket.__doc__
741
+
742
+
743
+ def pow(col1: Union["ColumnOrName", float], col2: Union["ColumnOrName", float]) -> Column:
744
+ return _invoke_binary_math_function("power", col1, col2)
745
+
746
+
747
+ pow.__doc__ = pysparkfuncs.pow.__doc__
748
+
749
+
750
+ def radians(col: "ColumnOrName") -> Column:
751
+ return _invoke_function_over_columns("radians", col)
752
+
753
+
754
+ radians.__doc__ = pysparkfuncs.radians.__doc__
755
+
756
+
757
+ def rint(col: "ColumnOrName") -> Column:
758
+ return _invoke_function_over_columns("rint", col)
759
+
760
+
761
+ rint.__doc__ = pysparkfuncs.rint.__doc__
762
+
763
+
764
+ def round(col: "ColumnOrName", scale: int = 0) -> Column:
765
+ return _invoke_function("round", _to_col(col), lit(scale))
766
+
767
+
768
+ round.__doc__ = pysparkfuncs.round.__doc__
769
+
770
+
771
+ def sec(col: "ColumnOrName") -> Column:
772
+ return _invoke_function_over_columns("sec", col)
773
+
774
+
775
+ sec.__doc__ = pysparkfuncs.sec.__doc__
776
+
777
+
778
+ def shiftLeft(col: "ColumnOrName", numBits: int) -> Column:
779
+ warnings.warn("Deprecated in 3.4, use shiftleft instead.", FutureWarning)
780
+ return shiftleft(col, numBits)
781
+
782
+
783
+ shiftLeft.__doc__ = pysparkfuncs.shiftLeft.__doc__
784
+
785
+
786
+ def shiftleft(col: "ColumnOrName", numBits: int) -> Column:
787
+ return _invoke_function("shiftleft", _to_col(col), lit(numBits))
788
+
789
+
790
+ shiftleft.__doc__ = pysparkfuncs.shiftleft.__doc__
791
+
792
+
793
+ def shiftRight(col: "ColumnOrName", numBits: int) -> Column:
794
+ warnings.warn("Deprecated in 3.4, use shiftright instead.", FutureWarning)
795
+ return shiftright(col, numBits)
796
+
797
+
798
+ shiftRight.__doc__ = pysparkfuncs.shiftRight.__doc__
799
+
800
+
801
+ def shiftright(col: "ColumnOrName", numBits: int) -> Column:
802
+ return _invoke_function("shiftright", _to_col(col), lit(numBits))
803
+
804
+
805
+ shiftright.__doc__ = pysparkfuncs.shiftright.__doc__
806
+
807
+
808
+ def shiftRightUnsigned(col: "ColumnOrName", numBits: int) -> Column:
809
+ warnings.warn("Deprecated in 3.4, use shiftrightunsigned instead.", FutureWarning)
810
+ return shiftrightunsigned(col, numBits)
811
+
812
+
813
+ shiftRightUnsigned.__doc__ = pysparkfuncs.shiftRightUnsigned.__doc__
814
+
815
+
816
+ def shiftrightunsigned(col: "ColumnOrName", numBits: int) -> Column:
817
+ return _invoke_function("shiftrightunsigned", _to_col(col), lit(numBits))
818
+
819
+
820
+ shiftrightunsigned.__doc__ = pysparkfuncs.shiftrightunsigned.__doc__
821
+
822
+
823
+ def signum(col: "ColumnOrName") -> Column:
824
+ return _invoke_function_over_columns("signum", col)
825
+
826
+
827
+ signum.__doc__ = pysparkfuncs.signum.__doc__
828
+
829
+
830
+ def sign(col: "ColumnOrName") -> Column:
831
+ return _invoke_function_over_columns("sign", col)
832
+
833
+
834
+ sign.__doc__ = pysparkfuncs.sign.__doc__
835
+
836
+
837
+ def sin(col: "ColumnOrName") -> Column:
838
+ return _invoke_function_over_columns("sin", col)
839
+
840
+
841
+ sin.__doc__ = pysparkfuncs.sin.__doc__
842
+
843
+
844
+ def sinh(col: "ColumnOrName") -> Column:
845
+ return _invoke_function_over_columns("sinh", col)
846
+
847
+
848
+ sinh.__doc__ = pysparkfuncs.sinh.__doc__
849
+
850
+
851
+ def sqrt(col: "ColumnOrName") -> Column:
852
+ return _invoke_function_over_columns("sqrt", col)
853
+
854
+
855
+ sqrt.__doc__ = pysparkfuncs.sqrt.__doc__
856
+
857
+
858
+ def try_add(left: "ColumnOrName", right: "ColumnOrName") -> Column:
859
+ return _invoke_function_over_columns("try_add", left, right)
860
+
861
+
862
+ try_add.__doc__ = pysparkfuncs.try_add.__doc__
863
+
864
+
865
+ def try_avg(col: "ColumnOrName") -> Column:
866
+ return _invoke_function_over_columns("try_avg", col)
867
+
868
+
869
+ try_avg.__doc__ = pysparkfuncs.try_avg.__doc__
870
+
871
+
872
+ def try_divide(left: "ColumnOrName", right: "ColumnOrName") -> Column:
873
+ return _invoke_function_over_columns("try_divide", left, right)
874
+
875
+
876
+ try_divide.__doc__ = pysparkfuncs.try_divide.__doc__
877
+
878
+
879
+ def try_multiply(left: "ColumnOrName", right: "ColumnOrName") -> Column:
880
+ return _invoke_function_over_columns("try_multiply", left, right)
881
+
882
+
883
+ try_multiply.__doc__ = pysparkfuncs.try_multiply.__doc__
884
+
885
+
886
+ def try_subtract(left: "ColumnOrName", right: "ColumnOrName") -> Column:
887
+ return _invoke_function_over_columns("try_subtract", left, right)
888
+
889
+
890
+ try_subtract.__doc__ = pysparkfuncs.try_subtract.__doc__
891
+
892
+
893
+ def try_sum(col: "ColumnOrName") -> Column:
894
+ return _invoke_function_over_columns("try_sum", col)
895
+
896
+
897
+ try_sum.__doc__ = pysparkfuncs.try_sum.__doc__
898
+
899
+
900
+ def tan(col: "ColumnOrName") -> Column:
901
+ return _invoke_function_over_columns("tan", col)
902
+
903
+
904
+ tan.__doc__ = pysparkfuncs.tan.__doc__
905
+
906
+
907
+ def tanh(col: "ColumnOrName") -> Column:
908
+ return _invoke_function_over_columns("tanh", col)
909
+
910
+
911
+ tanh.__doc__ = pysparkfuncs.tanh.__doc__
912
+
913
+
914
+ def toDegrees(col: "ColumnOrName") -> Column:
915
+ warnings.warn("Deprecated in 3.4, use degrees instead.", FutureWarning)
916
+ return degrees(col)
917
+
918
+
919
+ toDegrees.__doc__ = pysparkfuncs.toDegrees.__doc__
920
+
921
+
922
+ def toRadians(col: "ColumnOrName") -> Column:
923
+ warnings.warn("Deprecated in 3.4, use radians instead.", FutureWarning)
924
+ return radians(col)
925
+
926
+
927
+ toRadians.__doc__ = pysparkfuncs.toRadians.__doc__
928
+
929
+
930
+ def unhex(col: "ColumnOrName") -> Column:
931
+ return _invoke_function_over_columns("unhex", col)
932
+
933
+
934
+ unhex.__doc__ = pysparkfuncs.unhex.__doc__
935
+
936
+
937
+ def approxCountDistinct(col: "ColumnOrName", rsd: Optional[float] = None) -> Column:
938
+ warnings.warn("Deprecated in 3.4, use approx_count_distinct instead.", FutureWarning)
939
+ return approx_count_distinct(col, rsd)
940
+
941
+
942
+ approxCountDistinct.__doc__ = pysparkfuncs.approxCountDistinct.__doc__
943
+
944
+
945
+ def approx_count_distinct(col: "ColumnOrName", rsd: Optional[float] = None) -> Column:
946
+ if rsd is None:
947
+ return _invoke_function("approx_count_distinct", _to_col(col))
948
+ else:
949
+ return _invoke_function("approx_count_distinct", _to_col(col), lit(rsd))
950
+
951
+
952
+ approx_count_distinct.__doc__ = pysparkfuncs.approx_count_distinct.__doc__
953
+
954
+
955
+ def avg(col: "ColumnOrName") -> Column:
956
+ return _invoke_function_over_columns("avg", col)
957
+
958
+
959
+ avg.__doc__ = pysparkfuncs.avg.__doc__
960
+
961
+
962
+ def collect_list(col: "ColumnOrName") -> Column:
963
+ return _invoke_function_over_columns("collect_list", col)
964
+
965
+
966
+ collect_list.__doc__ = pysparkfuncs.collect_list.__doc__
967
+
968
+
969
+ def array_agg(col: "ColumnOrName") -> Column:
970
+ return _invoke_function_over_columns("array_agg", col)
971
+
972
+
973
+ array_agg.__doc__ = pysparkfuncs.array_agg.__doc__
974
+
975
+
976
+ def collect_set(col: "ColumnOrName") -> Column:
977
+ return _invoke_function_over_columns("collect_set", col)
978
+
979
+
980
+ collect_set.__doc__ = pysparkfuncs.collect_set.__doc__
981
+
982
+
983
+ def corr(col1: "ColumnOrName", col2: "ColumnOrName") -> Column:
984
+ return _invoke_function_over_columns("corr", col1, col2)
985
+
986
+
987
+ corr.__doc__ = pysparkfuncs.corr.__doc__
988
+
989
+
990
+ def count(col: "ColumnOrName") -> Column:
991
+ return _invoke_function_over_columns("count", col)
992
+
993
+
994
+ count.__doc__ = pysparkfuncs.count.__doc__
995
+
996
+
997
+ def countDistinct(col: "ColumnOrName", *cols: "ColumnOrName") -> Column:
998
+ return count_distinct(col, *cols)
999
+
1000
+
1001
+ countDistinct.__doc__ = pysparkfuncs.countDistinct.__doc__
1002
+
1003
+
1004
+ def count_distinct(col: "ColumnOrName", *cols: "ColumnOrName") -> Column:
1005
+ _exprs = [_to_col(c)._expr for c in [col] + list(cols)]
1006
+ return Column(UnresolvedFunction("count", _exprs, is_distinct=True))
1007
+
1008
+
1009
+ count_distinct.__doc__ = pysparkfuncs.count_distinct.__doc__
1010
+
1011
+
1012
+ def covar_pop(col1: "ColumnOrName", col2: "ColumnOrName") -> Column:
1013
+ return _invoke_function_over_columns("covar_pop", col1, col2)
1014
+
1015
+
1016
+ covar_pop.__doc__ = pysparkfuncs.covar_pop.__doc__
1017
+
1018
+
1019
+ def covar_samp(col1: "ColumnOrName", col2: "ColumnOrName") -> Column:
1020
+ return _invoke_function_over_columns("covar_samp", col1, col2)
1021
+
1022
+
1023
+ covar_samp.__doc__ = pysparkfuncs.covar_samp.__doc__
1024
+
1025
+
1026
+ def first(col: "ColumnOrName", ignorenulls: bool = False) -> Column:
1027
+ return _invoke_function("first", _to_col(col), lit(ignorenulls))
1028
+
1029
+
1030
+ first.__doc__ = pysparkfuncs.first.__doc__
1031
+
1032
+
1033
+ def grouping(col: "ColumnOrName") -> Column:
1034
+ return _invoke_function_over_columns("grouping", col)
1035
+
1036
+
1037
+ grouping.__doc__ = pysparkfuncs.grouping.__doc__
1038
+
1039
+
1040
+ def grouping_id(*cols: "ColumnOrName") -> Column:
1041
+ return _invoke_function_over_columns("grouping_id", *cols)
1042
+
1043
+
1044
+ grouping_id.__doc__ = pysparkfuncs.grouping_id.__doc__
1045
+
1046
+
1047
+ def count_min_sketch(
1048
+ col: "ColumnOrName",
1049
+ eps: "ColumnOrName",
1050
+ confidence: "ColumnOrName",
1051
+ seed: "ColumnOrName",
1052
+ ) -> Column:
1053
+ return _invoke_function_over_columns("count_min_sketch", col, eps, confidence, seed)
1054
+
1055
+
1056
+ count_min_sketch.__doc__ = pysparkfuncs.count_min_sketch.__doc__
1057
+
1058
+
1059
+ def kurtosis(col: "ColumnOrName") -> Column:
1060
+ return _invoke_function_over_columns("kurtosis", col)
1061
+
1062
+
1063
+ kurtosis.__doc__ = pysparkfuncs.kurtosis.__doc__
1064
+
1065
+
1066
+ def last(col: "ColumnOrName", ignorenulls: bool = False) -> Column:
1067
+ return _invoke_function("last", _to_col(col), lit(ignorenulls))
1068
+
1069
+
1070
+ last.__doc__ = pysparkfuncs.last.__doc__
1071
+
1072
+
1073
+ def max(col: "ColumnOrName") -> Column:
1074
+ return _invoke_function_over_columns("max", col)
1075
+
1076
+
1077
+ max.__doc__ = pysparkfuncs.max.__doc__
1078
+
1079
+
1080
+ def max_by(col: "ColumnOrName", ord: "ColumnOrName") -> Column:
1081
+ return _invoke_function_over_columns("max_by", col, ord)
1082
+
1083
+
1084
+ max_by.__doc__ = pysparkfuncs.max_by.__doc__
1085
+
1086
+
1087
+ def mean(col: "ColumnOrName") -> Column:
1088
+ return avg(col)
1089
+
1090
+
1091
+ mean.__doc__ = pysparkfuncs.mean.__doc__
1092
+
1093
+
1094
+ def median(col: "ColumnOrName") -> Column:
1095
+ return _invoke_function_over_columns("median", col)
1096
+
1097
+
1098
+ median.__doc__ = pysparkfuncs.median.__doc__
1099
+
1100
+
1101
+ def min(col: "ColumnOrName") -> Column:
1102
+ return _invoke_function_over_columns("min", col)
1103
+
1104
+
1105
+ min.__doc__ = pysparkfuncs.min.__doc__
1106
+
1107
+
1108
+ def min_by(col: "ColumnOrName", ord: "ColumnOrName") -> Column:
1109
+ return _invoke_function_over_columns("min_by", col, ord)
1110
+
1111
+
1112
+ min_by.__doc__ = pysparkfuncs.min_by.__doc__
1113
+
1114
+
1115
+ def mode(col: "ColumnOrName") -> Column:
1116
+ return _invoke_function_over_columns("mode", col)
1117
+
1118
+
1119
+ mode.__doc__ = pysparkfuncs.mode.__doc__
1120
+
1121
+
1122
+ def percentile(
1123
+ col: "ColumnOrName",
1124
+ percentage: Union[Column, float, List[float], Tuple[float]],
1125
+ frequency: Union[Column, int] = 1,
1126
+ ) -> Column:
1127
+ if isinstance(percentage, Column):
1128
+ _percentage = percentage
1129
+ elif isinstance(percentage, (list, tuple)):
1130
+ # Convert tuple to list
1131
+ _percentage = lit(list(percentage))
1132
+ else:
1133
+ # Probably scalar
1134
+ _percentage = lit(percentage)
1135
+
1136
+ if isinstance(frequency, int):
1137
+ _frequency = lit(frequency)
1138
+ elif isinstance(frequency, Column):
1139
+ _frequency = frequency
1140
+ else:
1141
+ raise PySparkTypeError(
1142
+ error_class="NOT_COLUMN_OR_INT",
1143
+ message_parameters={
1144
+ "arg_name": "frequency",
1145
+ "arg_type": type(frequency).__name__,
1146
+ },
1147
+ )
1148
+
1149
+ return _invoke_function("percentile", _to_col(col), _percentage, _frequency)
1150
+
1151
+
1152
+ percentile.__doc__ = pysparkfuncs.percentile.__doc__
1153
+
1154
+
1155
+ def percentile_approx(
1156
+ col: "ColumnOrName",
1157
+ percentage: Union[Column, float, List[float], Tuple[float]],
1158
+ accuracy: Union[Column, float] = 10000,
1159
+ ) -> Column:
1160
+ if isinstance(percentage, Column):
1161
+ percentage_col = percentage
1162
+ elif isinstance(percentage, (list, tuple)):
1163
+ # Convert tuple to list
1164
+ percentage_col = lit(list(percentage))
1165
+ else:
1166
+ # Probably scalar
1167
+ percentage_col = lit(percentage)
1168
+
1169
+ return _invoke_function("percentile_approx", _to_col(col), percentage_col, lit(accuracy))
1170
+
1171
+
1172
+ percentile_approx.__doc__ = pysparkfuncs.percentile_approx.__doc__
1173
+
1174
+
1175
+ def approx_percentile(
1176
+ col: "ColumnOrName",
1177
+ percentage: Union[Column, float, List[float], Tuple[float]],
1178
+ accuracy: Union[Column, float] = 10000,
1179
+ ) -> Column:
1180
+ if isinstance(percentage, Column):
1181
+ percentage_col = percentage
1182
+ elif isinstance(percentage, (list, tuple)):
1183
+ # Convert tuple to list
1184
+ percentage_col = lit(list(percentage))
1185
+ else:
1186
+ # Probably scalar
1187
+ percentage_col = lit(percentage)
1188
+
1189
+ return _invoke_function("approx_percentile", _to_col(col), percentage_col, lit(accuracy))
1190
+
1191
+
1192
+ approx_percentile.__doc__ = pysparkfuncs.approx_percentile.__doc__
1193
+
1194
+
1195
+ def product(col: "ColumnOrName") -> Column:
1196
+ return _invoke_function_over_columns("product", col)
1197
+
1198
+
1199
+ product.__doc__ = pysparkfuncs.product.__doc__
1200
+
1201
+
1202
+ def skewness(col: "ColumnOrName") -> Column:
1203
+ return _invoke_function_over_columns("skewness", col)
1204
+
1205
+
1206
+ skewness.__doc__ = pysparkfuncs.skewness.__doc__
1207
+
1208
+
1209
+ def stddev(col: "ColumnOrName") -> Column:
1210
+ return _invoke_function_over_columns("stddev", col)
1211
+
1212
+
1213
+ stddev.__doc__ = pysparkfuncs.stddev.__doc__
1214
+
1215
+
1216
+ def std(col: "ColumnOrName") -> Column:
1217
+ return _invoke_function_over_columns("std", col)
1218
+
1219
+
1220
+ std.__doc__ = pysparkfuncs.std.__doc__
1221
+
1222
+
1223
+ def stddev_samp(col: "ColumnOrName") -> Column:
1224
+ return _invoke_function_over_columns("stddev_samp", col)
1225
+
1226
+
1227
+ stddev_samp.__doc__ = pysparkfuncs.stddev_samp.__doc__
1228
+
1229
+
1230
+ def stddev_pop(col: "ColumnOrName") -> Column:
1231
+ return _invoke_function_over_columns("stddev_pop", col)
1232
+
1233
+
1234
+ stddev_pop.__doc__ = pysparkfuncs.stddev_pop.__doc__
1235
+
1236
+
1237
+ def sum(col: "ColumnOrName") -> Column:
1238
+ return _invoke_function_over_columns("sum", col)
1239
+
1240
+
1241
+ sum.__doc__ = pysparkfuncs.sum.__doc__
1242
+
1243
+
1244
+ def sumDistinct(col: "ColumnOrName") -> Column:
1245
+ warnings.warn("Deprecated in 3.4, use sum_distinct instead.", FutureWarning)
1246
+ return sum_distinct(col)
1247
+
1248
+
1249
+ sumDistinct.__doc__ = pysparkfuncs.sumDistinct.__doc__
1250
+
1251
+
1252
+ def sum_distinct(col: "ColumnOrName") -> Column:
1253
+ return Column(UnresolvedFunction("sum", [_to_col(col)._expr], is_distinct=True))
1254
+
1255
+
1256
+ sum_distinct.__doc__ = pysparkfuncs.sum_distinct.__doc__
1257
+
1258
+
1259
+ def var_pop(col: "ColumnOrName") -> Column:
1260
+ return _invoke_function_over_columns("var_pop", col)
1261
+
1262
+
1263
+ var_pop.__doc__ = pysparkfuncs.var_pop.__doc__
1264
+
1265
+
1266
+ def regr_avgx(y: "ColumnOrName", x: "ColumnOrName") -> Column:
1267
+ return _invoke_function_over_columns("regr_avgx", y, x)
1268
+
1269
+
1270
+ regr_avgx.__doc__ = pysparkfuncs.regr_avgx.__doc__
1271
+
1272
+
1273
+ def regr_avgy(y: "ColumnOrName", x: "ColumnOrName") -> Column:
1274
+ return _invoke_function_over_columns("regr_avgy", y, x)
1275
+
1276
+
1277
+ regr_avgy.__doc__ = pysparkfuncs.regr_avgy.__doc__
1278
+
1279
+
1280
+ def regr_count(y: "ColumnOrName", x: "ColumnOrName") -> Column:
1281
+ return _invoke_function_over_columns("regr_count", y, x)
1282
+
1283
+
1284
+ regr_count.__doc__ = pysparkfuncs.regr_count.__doc__
1285
+
1286
+
1287
+ def regr_intercept(y: "ColumnOrName", x: "ColumnOrName") -> Column:
1288
+ return _invoke_function_over_columns("regr_intercept", y, x)
1289
+
1290
+
1291
+ regr_intercept.__doc__ = pysparkfuncs.regr_intercept.__doc__
1292
+
1293
+
1294
+ def regr_r2(y: "ColumnOrName", x: "ColumnOrName") -> Column:
1295
+ return _invoke_function_over_columns("regr_r2", y, x)
1296
+
1297
+
1298
+ regr_r2.__doc__ = pysparkfuncs.regr_r2.__doc__
1299
+
1300
+
1301
+ def regr_slope(y: "ColumnOrName", x: "ColumnOrName") -> Column:
1302
+ return _invoke_function_over_columns("regr_slope", y, x)
1303
+
1304
+
1305
+ regr_slope.__doc__ = pysparkfuncs.regr_slope.__doc__
1306
+
1307
+
1308
+ def regr_sxx(y: "ColumnOrName", x: "ColumnOrName") -> Column:
1309
+ return _invoke_function_over_columns("regr_sxx", y, x)
1310
+
1311
+
1312
+ regr_sxx.__doc__ = pysparkfuncs.regr_sxx.__doc__
1313
+
1314
+
1315
+ def regr_sxy(y: "ColumnOrName", x: "ColumnOrName") -> Column:
1316
+ return _invoke_function_over_columns("regr_sxy", y, x)
1317
+
1318
+
1319
+ regr_sxy.__doc__ = pysparkfuncs.regr_sxy.__doc__
1320
+
1321
+
1322
+ def regr_syy(y: "ColumnOrName", x: "ColumnOrName") -> Column:
1323
+ return _invoke_function_over_columns("regr_syy", y, x)
1324
+
1325
+
1326
+ regr_syy.__doc__ = pysparkfuncs.regr_syy.__doc__
1327
+
1328
+
1329
+ def var_samp(col: "ColumnOrName") -> Column:
1330
+ return _invoke_function_over_columns("var_samp", col)
1331
+
1332
+
1333
+ var_samp.__doc__ = pysparkfuncs.var_samp.__doc__
1334
+
1335
+
1336
+ def variance(col: "ColumnOrName") -> Column:
1337
+ return var_samp(col)
1338
+
1339
+
1340
+ variance.__doc__ = pysparkfuncs.variance.__doc__
1341
+
1342
+
1343
+ def every(col: "ColumnOrName") -> Column:
1344
+ return _invoke_function_over_columns("every", col)
1345
+
1346
+
1347
+ every.__doc__ = pysparkfuncs.every.__doc__
1348
+
1349
+
1350
+ def bool_and(col: "ColumnOrName") -> Column:
1351
+ return _invoke_function_over_columns("bool_and", col)
1352
+
1353
+
1354
+ bool_and.__doc__ = pysparkfuncs.bool_and.__doc__
1355
+
1356
+
1357
+ def some(col: "ColumnOrName") -> Column:
1358
+ return _invoke_function_over_columns("some", col)
1359
+
1360
+
1361
+ some.__doc__ = pysparkfuncs.some.__doc__
1362
+
1363
+
1364
+ def bool_or(col: "ColumnOrName") -> Column:
1365
+ return _invoke_function_over_columns("bool_or", col)
1366
+
1367
+
1368
+ bool_or.__doc__ = pysparkfuncs.bool_or.__doc__
1369
+
1370
+
1371
+ def bit_and(col: "ColumnOrName") -> Column:
1372
+ return _invoke_function_over_columns("bit_and", col)
1373
+
1374
+
1375
+ bit_and.__doc__ = pysparkfuncs.bit_and.__doc__
1376
+
1377
+
1378
+ def bit_or(col: "ColumnOrName") -> Column:
1379
+ return _invoke_function_over_columns("bit_or", col)
1380
+
1381
+
1382
+ bit_or.__doc__ = pysparkfuncs.bit_or.__doc__
1383
+
1384
+
1385
+ def bit_xor(col: "ColumnOrName") -> Column:
1386
+ return _invoke_function_over_columns("bit_xor", col)
1387
+
1388
+
1389
+ bit_xor.__doc__ = pysparkfuncs.bit_xor.__doc__
1390
+
1391
+
1392
+ # Window Functions
1393
+
1394
+
1395
+ def cume_dist() -> Column:
1396
+ return _invoke_function("cume_dist")
1397
+
1398
+
1399
+ cume_dist.__doc__ = pysparkfuncs.cume_dist.__doc__
1400
+
1401
+
1402
+ def dense_rank() -> Column:
1403
+ return _invoke_function("dense_rank")
1404
+
1405
+
1406
+ dense_rank.__doc__ = pysparkfuncs.dense_rank.__doc__
1407
+
1408
+
1409
+ def lag(col: "ColumnOrName", offset: int = 1, default: Optional[Any] = None) -> Column:
1410
+ if default is None:
1411
+ return _invoke_function("lag", _to_col(col), lit(offset))
1412
+ else:
1413
+ return _invoke_function("lag", _to_col(col), lit(offset), lit(default))
1414
+
1415
+
1416
+ lag.__doc__ = pysparkfuncs.lag.__doc__
1417
+
1418
+
1419
+ def lead(col: "ColumnOrName", offset: int = 1, default: Optional[Any] = None) -> Column:
1420
+ if default is None:
1421
+ return _invoke_function("lead", _to_col(col), lit(offset))
1422
+ else:
1423
+ return _invoke_function("lead", _to_col(col), lit(offset), lit(default))
1424
+
1425
+
1426
+ lead.__doc__ = pysparkfuncs.lead.__doc__
1427
+
1428
+
1429
+ def nth_value(col: "ColumnOrName", offset: int, ignoreNulls: Optional[bool] = None) -> Column:
1430
+ if ignoreNulls is None:
1431
+ return _invoke_function("nth_value", _to_col(col), lit(offset))
1432
+ else:
1433
+ return _invoke_function("nth_value", _to_col(col), lit(offset), lit(ignoreNulls))
1434
+
1435
+
1436
+ nth_value.__doc__ = pysparkfuncs.nth_value.__doc__
1437
+
1438
+
1439
+ def any_value(col: "ColumnOrName", ignoreNulls: Optional[Union[bool, Column]] = None) -> Column:
1440
+ if ignoreNulls is None:
1441
+ return _invoke_function_over_columns("any_value", col)
1442
+ else:
1443
+ ignoreNulls = lit(ignoreNulls) if isinstance(ignoreNulls, bool) else ignoreNulls
1444
+ return _invoke_function_over_columns("any_value", col, ignoreNulls)
1445
+
1446
+
1447
+ any_value.__doc__ = pysparkfuncs.any_value.__doc__
1448
+
1449
+
1450
+ def first_value(col: "ColumnOrName", ignoreNulls: Optional[Union[bool, Column]] = None) -> Column:
1451
+ if ignoreNulls is None:
1452
+ return _invoke_function_over_columns("first_value", col)
1453
+ else:
1454
+ ignoreNulls = lit(ignoreNulls) if isinstance(ignoreNulls, bool) else ignoreNulls
1455
+ return _invoke_function_over_columns("first_value", col, ignoreNulls)
1456
+
1457
+
1458
+ first_value.__doc__ = pysparkfuncs.first_value.__doc__
1459
+
1460
+
1461
+ def last_value(col: "ColumnOrName", ignoreNulls: Optional[Union[bool, Column]] = None) -> Column:
1462
+ if ignoreNulls is None:
1463
+ return _invoke_function_over_columns("last_value", col)
1464
+ else:
1465
+ ignoreNulls = lit(ignoreNulls) if isinstance(ignoreNulls, bool) else ignoreNulls
1466
+ return _invoke_function_over_columns("last_value", col, ignoreNulls)
1467
+
1468
+
1469
+ last_value.__doc__ = pysparkfuncs.last_value.__doc__
1470
+
1471
+
1472
+ def count_if(col: "ColumnOrName") -> Column:
1473
+ return _invoke_function_over_columns("count_if", col)
1474
+
1475
+
1476
+ count_if.__doc__ = pysparkfuncs.count_if.__doc__
1477
+
1478
+
1479
+ def histogram_numeric(col: "ColumnOrName", nBins: "ColumnOrName") -> Column:
1480
+ return _invoke_function_over_columns("histogram_numeric", col, nBins)
1481
+
1482
+
1483
+ histogram_numeric.__doc__ = pysparkfuncs.histogram_numeric.__doc__
1484
+
1485
+
1486
+ def ntile(n: int) -> Column:
1487
+ return _invoke_function("ntile", lit(n))
1488
+
1489
+
1490
+ ntile.__doc__ = pysparkfuncs.ntile.__doc__
1491
+
1492
+
1493
+ def percent_rank() -> Column:
1494
+ return _invoke_function("percent_rank")
1495
+
1496
+
1497
+ percent_rank.__doc__ = pysparkfuncs.percent_rank.__doc__
1498
+
1499
+
1500
+ def rank() -> Column:
1501
+ return _invoke_function("rank")
1502
+
1503
+
1504
+ rank.__doc__ = pysparkfuncs.rank.__doc__
1505
+
1506
+
1507
+ def row_number() -> Column:
1508
+ return _invoke_function("row_number")
1509
+
1510
+
1511
+ row_number.__doc__ = pysparkfuncs.row_number.__doc__
1512
+
1513
+
1514
+ def aggregate(
1515
+ col: "ColumnOrName",
1516
+ initialValue: "ColumnOrName",
1517
+ merge: Callable[[Column, Column], Column],
1518
+ finish: Optional[Callable[[Column], Column]] = None,
1519
+ ) -> Column:
1520
+ if finish is not None:
1521
+ return _invoke_higher_order_function("aggregate", [col, initialValue], [merge, finish])
1522
+
1523
+ else:
1524
+ return _invoke_higher_order_function("aggregate", [col, initialValue], [merge])
1525
+
1526
+
1527
+ aggregate.__doc__ = pysparkfuncs.aggregate.__doc__
1528
+
1529
+
1530
+ def reduce(
1531
+ col: "ColumnOrName",
1532
+ initialValue: "ColumnOrName",
1533
+ merge: Callable[[Column, Column], Column],
1534
+ finish: Optional[Callable[[Column], Column]] = None,
1535
+ ) -> Column:
1536
+ if finish is not None:
1537
+ return _invoke_higher_order_function("reduce", [col, initialValue], [merge, finish])
1538
+
1539
+ else:
1540
+ return _invoke_higher_order_function("reduce", [col, initialValue], [merge])
1541
+
1542
+
1543
+ reduce.__doc__ = pysparkfuncs.reduce.__doc__
1544
+
1545
+
1546
+ def array(*cols: Union["ColumnOrName", List["ColumnOrName"], Tuple["ColumnOrName", ...]]) -> Column:
1547
+ if len(cols) == 1 and isinstance(cols[0], (list, set, tuple)):
1548
+ cols = cols[0] # type: ignore[assignment]
1549
+ return _invoke_function_over_columns("array", *cols) # type: ignore[arg-type]
1550
+
1551
+
1552
+ array.__doc__ = pysparkfuncs.array.__doc__
1553
+
1554
+
1555
+ def array_append(col: "ColumnOrName", value: Any) -> Column:
1556
+ return _invoke_function("array_append", _to_col(col), lit(value))
1557
+
1558
+
1559
+ array_append.__doc__ = pysparkfuncs.array_append.__doc__
1560
+
1561
+
1562
+ def array_contains(col: "ColumnOrName", value: Any) -> Column:
1563
+ return _invoke_function("array_contains", _to_col(col), lit(value))
1564
+
1565
+
1566
+ array_contains.__doc__ = pysparkfuncs.array_contains.__doc__
1567
+
1568
+
1569
+ def array_distinct(col: "ColumnOrName") -> Column:
1570
+ return _invoke_function_over_columns("array_distinct", col)
1571
+
1572
+
1573
+ array_distinct.__doc__ = pysparkfuncs.array_distinct.__doc__
1574
+
1575
+
1576
+ def array_except(col1: "ColumnOrName", col2: "ColumnOrName") -> Column:
1577
+ return _invoke_function_over_columns("array_except", col1, col2)
1578
+
1579
+
1580
+ array_except.__doc__ = pysparkfuncs.array_except.__doc__
1581
+
1582
+
1583
+ def array_insert(arr: "ColumnOrName", pos: Union["ColumnOrName", int], value: Any) -> Column:
1584
+ _pos = lit(pos) if isinstance(pos, int) else _to_col(pos)
1585
+ return _invoke_function("array_insert", _to_col(arr), _pos, lit(value))
1586
+
1587
+
1588
+ array_insert.__doc__ = pysparkfuncs.array_insert.__doc__
1589
+
1590
+
1591
+ def array_intersect(col1: "ColumnOrName", col2: "ColumnOrName") -> Column:
1592
+ return _invoke_function_over_columns("array_intersect", col1, col2)
1593
+
1594
+
1595
+ array_intersect.__doc__ = pysparkfuncs.array_intersect.__doc__
1596
+
1597
+
1598
+ def array_compact(col: "ColumnOrName") -> Column:
1599
+ return _invoke_function_over_columns("array_compact", col)
1600
+
1601
+
1602
+ array_compact.__doc__ = pysparkfuncs.array_compact.__doc__
1603
+
1604
+
1605
+ def array_join(
1606
+ col: "ColumnOrName", delimiter: str, null_replacement: Optional[str] = None
1607
+ ) -> Column:
1608
+ if null_replacement is None:
1609
+ return _invoke_function("array_join", _to_col(col), lit(delimiter))
1610
+ else:
1611
+ return _invoke_function("array_join", _to_col(col), lit(delimiter), lit(null_replacement))
1612
+
1613
+
1614
+ array_join.__doc__ = pysparkfuncs.array_join.__doc__
1615
+
1616
+
1617
+ def array_max(col: "ColumnOrName") -> Column:
1618
+ return _invoke_function_over_columns("array_max", col)
1619
+
1620
+
1621
+ array_max.__doc__ = pysparkfuncs.array_max.__doc__
1622
+
1623
+
1624
+ def array_min(col: "ColumnOrName") -> Column:
1625
+ return _invoke_function_over_columns("array_min", col)
1626
+
1627
+
1628
+ array_min.__doc__ = pysparkfuncs.array_min.__doc__
1629
+
1630
+
1631
+ def array_size(col: "ColumnOrName") -> Column:
1632
+ return _invoke_function_over_columns("array_size", col)
1633
+
1634
+
1635
+ array_size.__doc__ = pysparkfuncs.array_size.__doc__
1636
+
1637
+
1638
+ def cardinality(col: "ColumnOrName") -> Column:
1639
+ return _invoke_function_over_columns("cardinality", col)
1640
+
1641
+
1642
+ cardinality.__doc__ = pysparkfuncs.cardinality.__doc__
1643
+
1644
+
1645
+ def array_position(col: "ColumnOrName", value: Any) -> Column:
1646
+ return _invoke_function("array_position", _to_col(col), lit(value))
1647
+
1648
+
1649
+ array_position.__doc__ = pysparkfuncs.array_position.__doc__
1650
+
1651
+
1652
+ def array_prepend(col: "ColumnOrName", value: Any) -> Column:
1653
+ return _invoke_function("array_prepend", _to_col(col), lit(value))
1654
+
1655
+
1656
+ array_prepend.__doc__ = pysparkfuncs.array_prepend.__doc__
1657
+
1658
+
1659
+ def array_remove(col: "ColumnOrName", element: Any) -> Column:
1660
+ return _invoke_function("array_remove", _to_col(col), lit(element))
1661
+
1662
+
1663
+ array_remove.__doc__ = pysparkfuncs.array_remove.__doc__
1664
+
1665
+
1666
+ def array_repeat(col: "ColumnOrName", count: Union["ColumnOrName", int]) -> Column:
1667
+ _count = lit(count) if isinstance(count, int) else _to_col(count)
1668
+ return _invoke_function("array_repeat", _to_col(col), _count)
1669
+
1670
+
1671
+ array_repeat.__doc__ = pysparkfuncs.array_repeat.__doc__
1672
+
1673
+
1674
+ def array_sort(
1675
+ col: "ColumnOrName", comparator: Optional[Callable[[Column, Column], Column]] = None
1676
+ ) -> Column:
1677
+ if comparator is None:
1678
+ return _invoke_function_over_columns("array_sort", col)
1679
+ else:
1680
+ return _invoke_higher_order_function("array_sort", [col], [comparator])
1681
+
1682
+
1683
+ array_sort.__doc__ = pysparkfuncs.array_sort.__doc__
1684
+
1685
+
1686
+ def array_union(col1: "ColumnOrName", col2: "ColumnOrName") -> Column:
1687
+ return _invoke_function_over_columns("array_union", col1, col2)
1688
+
1689
+
1690
+ array_union.__doc__ = pysparkfuncs.array_union.__doc__
1691
+
1692
+
1693
+ def arrays_overlap(a1: "ColumnOrName", a2: "ColumnOrName") -> Column:
1694
+ return _invoke_function_over_columns("arrays_overlap", a1, a2)
1695
+
1696
+
1697
+ arrays_overlap.__doc__ = pysparkfuncs.arrays_overlap.__doc__
1698
+
1699
+
1700
+ def arrays_zip(*cols: "ColumnOrName") -> Column:
1701
+ return _invoke_function_over_columns("arrays_zip", *cols)
1702
+
1703
+
1704
+ arrays_zip.__doc__ = pysparkfuncs.arrays_zip.__doc__
1705
+
1706
+
1707
+ def concat(*cols: "ColumnOrName") -> Column:
1708
+ return _invoke_function_over_columns("concat", *cols)
1709
+
1710
+
1711
+ concat.__doc__ = pysparkfuncs.concat.__doc__
1712
+
1713
+
1714
+ def create_map(
1715
+ *cols: Union["ColumnOrName", List["ColumnOrName"], Tuple["ColumnOrName", ...]]
1716
+ ) -> Column:
1717
+ if len(cols) == 1 and isinstance(cols[0], (list, set, tuple)):
1718
+ cols = cols[0] # type: ignore[assignment]
1719
+ return _invoke_function_over_columns("map", *cols) # type: ignore[arg-type]
1720
+
1721
+
1722
+ create_map.__doc__ = pysparkfuncs.create_map.__doc__
1723
+
1724
+
1725
+ def element_at(col: "ColumnOrName", extraction: Any) -> Column:
1726
+ return _invoke_function("element_at", _to_col(col), lit(extraction))
1727
+
1728
+
1729
+ element_at.__doc__ = pysparkfuncs.element_at.__doc__
1730
+
1731
+
1732
+ def try_element_at(col: "ColumnOrName", extraction: "ColumnOrName") -> Column:
1733
+ return _invoke_function_over_columns("try_element_at", col, extraction)
1734
+
1735
+
1736
+ try_element_at.__doc__ = pysparkfuncs.try_element_at.__doc__
1737
+
1738
+
1739
+ def exists(col: "ColumnOrName", f: Callable[[Column], Column]) -> Column:
1740
+ return _invoke_higher_order_function("exists", [col], [f])
1741
+
1742
+
1743
+ exists.__doc__ = pysparkfuncs.exists.__doc__
1744
+
1745
+
1746
+ def explode(col: "ColumnOrName") -> Column:
1747
+ return _invoke_function_over_columns("explode", col)
1748
+
1749
+
1750
+ explode.__doc__ = pysparkfuncs.explode.__doc__
1751
+
1752
+
1753
+ def explode_outer(col: "ColumnOrName") -> Column:
1754
+ return _invoke_function_over_columns("explode_outer", col)
1755
+
1756
+
1757
+ explode_outer.__doc__ = pysparkfuncs.explode_outer.__doc__
1758
+
1759
+
1760
+ def filter(
1761
+ col: "ColumnOrName",
1762
+ f: Union[Callable[[Column], Column], Callable[[Column, Column], Column]],
1763
+ ) -> Column:
1764
+ return _invoke_higher_order_function("filter", [col], [f])
1765
+
1766
+
1767
+ filter.__doc__ = pysparkfuncs.filter.__doc__
1768
+
1769
+
1770
+ def flatten(col: "ColumnOrName") -> Column:
1771
+ return _invoke_function_over_columns("flatten", col)
1772
+
1773
+
1774
+ flatten.__doc__ = pysparkfuncs.flatten.__doc__
1775
+
1776
+
1777
+ def forall(col: "ColumnOrName", f: Callable[[Column], Column]) -> Column:
1778
+ return _invoke_higher_order_function("forall", [col], [f])
1779
+
1780
+
1781
+ forall.__doc__ = pysparkfuncs.forall.__doc__
1782
+
1783
+
1784
+ # TODO: support options
1785
+ def from_csv(
1786
+ col: "ColumnOrName",
1787
+ schema: Union[Column, str],
1788
+ options: Optional[Dict[str, str]] = None,
1789
+ ) -> Column:
1790
+ if isinstance(schema, Column):
1791
+ _schema = schema
1792
+ elif isinstance(schema, str):
1793
+ _schema = lit(schema)
1794
+ else:
1795
+ raise PySparkTypeError(
1796
+ error_class="NOT_COLUMN_OR_STR",
1797
+ message_parameters={"arg_name": "schema", "arg_type": type(schema).__name__},
1798
+ )
1799
+
1800
+ if options is None:
1801
+ return _invoke_function("from_csv", _to_col(col), _schema)
1802
+ else:
1803
+ return _invoke_function("from_csv", _to_col(col), _schema, _options_to_col(options))
1804
+
1805
+
1806
+ from_csv.__doc__ = pysparkfuncs.from_csv.__doc__
1807
+
1808
+
1809
+ def from_json(
1810
+ col: "ColumnOrName",
1811
+ schema: Union[ArrayType, StructType, Column, str],
1812
+ options: Optional[Dict[str, str]] = None,
1813
+ ) -> Column:
1814
+ if isinstance(schema, Column):
1815
+ _schema = schema
1816
+ elif isinstance(schema, DataType):
1817
+ _schema = lit(schema.json())
1818
+ elif isinstance(schema, str):
1819
+ _schema = lit(schema)
1820
+ else:
1821
+ raise PySparkTypeError(
1822
+ error_class="NOT_COLUMN_OR_DATATYPE_OR_STR",
1823
+ message_parameters={"arg_name": "schema", "arg_type": type(schema).__name__},
1824
+ )
1825
+
1826
+ if options is None:
1827
+ return _invoke_function("from_json", _to_col(col), _schema)
1828
+ else:
1829
+ return _invoke_function("from_json", _to_col(col), _schema, _options_to_col(options))
1830
+
1831
+
1832
+ from_json.__doc__ = pysparkfuncs.from_json.__doc__
1833
+
1834
+
1835
+ def get(col: "ColumnOrName", index: Union["ColumnOrName", int]) -> Column:
1836
+ index = lit(index) if isinstance(index, int) else index
1837
+
1838
+ return _invoke_function_over_columns("get", col, index)
1839
+
1840
+
1841
+ get.__doc__ = pysparkfuncs.get.__doc__
1842
+
1843
+
1844
+ def get_json_object(col: "ColumnOrName", path: str) -> Column:
1845
+ return _invoke_function("get_json_object", _to_col(col), lit(path))
1846
+
1847
+
1848
+ get_json_object.__doc__ = pysparkfuncs.get_json_object.__doc__
1849
+
1850
+
1851
+ def json_array_length(col: "ColumnOrName") -> Column:
1852
+ return _invoke_function_over_columns("json_array_length", col)
1853
+
1854
+
1855
+ json_array_length.__doc__ = pysparkfuncs.json_array_length.__doc__
1856
+
1857
+
1858
+ def json_object_keys(col: "ColumnOrName") -> Column:
1859
+ return _invoke_function_over_columns("json_object_keys", col)
1860
+
1861
+
1862
+ json_object_keys.__doc__ = pysparkfuncs.json_object_keys.__doc__
1863
+
1864
+
1865
+ def inline(col: "ColumnOrName") -> Column:
1866
+ return _invoke_function_over_columns("inline", col)
1867
+
1868
+
1869
+ inline.__doc__ = pysparkfuncs.inline.__doc__
1870
+
1871
+
1872
+ def inline_outer(col: "ColumnOrName") -> Column:
1873
+ return _invoke_function_over_columns("inline_outer", col)
1874
+
1875
+
1876
+ inline_outer.__doc__ = pysparkfuncs.inline_outer.__doc__
1877
+
1878
+
1879
+ def json_tuple(col: "ColumnOrName", *fields: str) -> Column:
1880
+ return _invoke_function("json_tuple", _to_col(col), *[lit(field) for field in fields])
1881
+
1882
+
1883
+ json_tuple.__doc__ = pysparkfuncs.json_tuple.__doc__
1884
+
1885
+
1886
+ def map_concat(
1887
+ *cols: Union["ColumnOrName", List["ColumnOrName"], Tuple["ColumnOrName", ...]]
1888
+ ) -> Column:
1889
+ if len(cols) == 1 and isinstance(cols[0], (list, set, tuple)):
1890
+ cols = cols[0] # type: ignore[assignment]
1891
+ return _invoke_function_over_columns("map_concat", *cols) # type: ignore[arg-type]
1892
+
1893
+
1894
+ map_concat.__doc__ = pysparkfuncs.map_concat.__doc__
1895
+
1896
+
1897
+ def map_contains_key(col: "ColumnOrName", value: Any) -> Column:
1898
+ return array_contains(map_keys(col), lit(value))
1899
+
1900
+
1901
+ map_contains_key.__doc__ = pysparkfuncs.map_contains_key.__doc__
1902
+
1903
+
1904
+ def map_entries(col: "ColumnOrName") -> Column:
1905
+ return _invoke_function_over_columns("map_entries", col)
1906
+
1907
+
1908
+ map_entries.__doc__ = pysparkfuncs.map_entries.__doc__
1909
+
1910
+
1911
+ def map_filter(col: "ColumnOrName", f: Callable[[Column, Column], Column]) -> Column:
1912
+ return _invoke_higher_order_function("map_filter", [col], [f])
1913
+
1914
+
1915
+ map_filter.__doc__ = pysparkfuncs.map_filter.__doc__
1916
+
1917
+
1918
+ def map_from_arrays(col1: "ColumnOrName", col2: "ColumnOrName") -> Column:
1919
+ return _invoke_function_over_columns("map_from_arrays", col1, col2)
1920
+
1921
+
1922
+ map_from_arrays.__doc__ = pysparkfuncs.map_from_arrays.__doc__
1923
+
1924
+
1925
+ def map_from_entries(col: "ColumnOrName") -> Column:
1926
+ return _invoke_function_over_columns("map_from_entries", col)
1927
+
1928
+
1929
+ map_from_entries.__doc__ = pysparkfuncs.map_from_entries.__doc__
1930
+
1931
+
1932
+ def map_keys(col: "ColumnOrName") -> Column:
1933
+ return _invoke_function_over_columns("map_keys", col)
1934
+
1935
+
1936
+ map_keys.__doc__ = pysparkfuncs.map_keys.__doc__
1937
+
1938
+
1939
+ def map_values(col: "ColumnOrName") -> Column:
1940
+ return _invoke_function_over_columns("map_values", col)
1941
+
1942
+
1943
+ map_values.__doc__ = pysparkfuncs.map_values.__doc__
1944
+
1945
+
1946
+ def map_zip_with(
1947
+ col1: "ColumnOrName",
1948
+ col2: "ColumnOrName",
1949
+ f: Callable[[Column, Column, Column], Column],
1950
+ ) -> Column:
1951
+ return _invoke_higher_order_function("map_zip_with", [col1, col2], [f])
1952
+
1953
+
1954
+ map_zip_with.__doc__ = pysparkfuncs.map_zip_with.__doc__
1955
+
1956
+
1957
+ def str_to_map(
1958
+ text: "ColumnOrName",
1959
+ pairDelim: Optional["ColumnOrName"] = None,
1960
+ keyValueDelim: Optional["ColumnOrName"] = None,
1961
+ ) -> Column:
1962
+ _pairDelim = lit(",") if pairDelim is None else _to_col(pairDelim)
1963
+ _keyValueDelim = lit(":") if keyValueDelim is None else _to_col(keyValueDelim)
1964
+
1965
+ return _invoke_function("str_to_map", _to_col(text), _pairDelim, _keyValueDelim)
1966
+
1967
+
1968
+ str_to_map.__doc__ = pysparkfuncs.str_to_map.__doc__
1969
+
1970
+
1971
+ def posexplode(col: "ColumnOrName") -> Column:
1972
+ return _invoke_function_over_columns("posexplode", col)
1973
+
1974
+
1975
+ posexplode.__doc__ = pysparkfuncs.posexplode.__doc__
1976
+
1977
+
1978
+ def posexplode_outer(col: "ColumnOrName") -> Column:
1979
+ return _invoke_function_over_columns("posexplode_outer", col)
1980
+
1981
+
1982
+ posexplode_outer.__doc__ = pysparkfuncs.posexplode_outer.__doc__
1983
+
1984
+
1985
+ def reverse(col: "ColumnOrName") -> Column:
1986
+ return _invoke_function_over_columns("reverse", col)
1987
+
1988
+
1989
+ reverse.__doc__ = pysparkfuncs.reverse.__doc__
1990
+
1991
+
1992
+ def sequence(
1993
+ start: "ColumnOrName", stop: "ColumnOrName", step: Optional["ColumnOrName"] = None
1994
+ ) -> Column:
1995
+ if step is None:
1996
+ return _invoke_function_over_columns("sequence", start, stop)
1997
+ else:
1998
+ return _invoke_function_over_columns("sequence", start, stop, step)
1999
+
2000
+
2001
+ sequence.__doc__ = pysparkfuncs.sequence.__doc__
2002
+
2003
+
2004
+ def schema_of_csv(csv: "ColumnOrName", options: Optional[Dict[str, str]] = None) -> Column:
2005
+ if isinstance(csv, Column):
2006
+ _csv = csv
2007
+ elif isinstance(csv, str):
2008
+ _csv = lit(csv)
2009
+ else:
2010
+ raise PySparkTypeError(
2011
+ error_class="NOT_COLUMN_OR_STR",
2012
+ message_parameters={"arg_name": "csv", "arg_type": type(csv).__name__},
2013
+ )
2014
+
2015
+ if options is None:
2016
+ return _invoke_function("schema_of_csv", _csv)
2017
+ else:
2018
+ return _invoke_function("schema_of_csv", _csv, _options_to_col(options))
2019
+
2020
+
2021
+ schema_of_csv.__doc__ = pysparkfuncs.schema_of_csv.__doc__
2022
+
2023
+
2024
+ def schema_of_json(json: "ColumnOrName", options: Optional[Dict[str, str]] = None) -> Column:
2025
+ if isinstance(json, Column):
2026
+ _json = json
2027
+ elif isinstance(json, str):
2028
+ _json = lit(json)
2029
+ else:
2030
+ raise PySparkTypeError(
2031
+ error_class="NOT_COLUMN_OR_STR",
2032
+ message_parameters={"arg_name": "json", "arg_type": type(json).__name__},
2033
+ )
2034
+
2035
+ if options is None:
2036
+ return _invoke_function("schema_of_json", _json)
2037
+ else:
2038
+ return _invoke_function("schema_of_json", _json, _options_to_col(options))
2039
+
2040
+
2041
+ schema_of_json.__doc__ = pysparkfuncs.schema_of_json.__doc__
2042
+
2043
+
2044
+ def shuffle(col: "ColumnOrName") -> Column:
2045
+ return _invoke_function_over_columns("shuffle", col)
2046
+
2047
+
2048
+ shuffle.__doc__ = pysparkfuncs.shuffle.__doc__
2049
+
2050
+
2051
+ def size(col: "ColumnOrName") -> Column:
2052
+ return _invoke_function_over_columns("size", col)
2053
+
2054
+
2055
+ size.__doc__ = pysparkfuncs.size.__doc__
2056
+
2057
+
2058
+ def slice(
2059
+ col: "ColumnOrName", start: Union["ColumnOrName", int], length: Union["ColumnOrName", int]
2060
+ ) -> Column:
2061
+ if isinstance(start, (Column, str)):
2062
+ _start = start
2063
+ elif isinstance(start, int):
2064
+ _start = lit(start)
2065
+ else:
2066
+ raise PySparkTypeError(
2067
+ error_class="NOT_COLUMN_OR_INT_OR_STR",
2068
+ message_parameters={"arg_name": "start", "arg_type": type(start).__name__},
2069
+ )
2070
+
2071
+ if isinstance(length, (Column, str)):
2072
+ _length = length
2073
+ elif isinstance(length, int):
2074
+ _length = lit(length)
2075
+ else:
2076
+ raise PySparkTypeError(
2077
+ error_class="NOT_COLUMN_OR_INT_OR_STR",
2078
+ message_parameters={"arg_name": "length", "arg_type": type(length).__name__},
2079
+ )
2080
+
2081
+ return _invoke_function_over_columns("slice", col, _start, _length)
2082
+
2083
+
2084
+ slice.__doc__ = pysparkfuncs.slice.__doc__
2085
+
2086
+
2087
+ def sort_array(col: "ColumnOrName", asc: bool = True) -> Column:
2088
+ return _invoke_function("sort_array", _to_col(col), lit(asc))
2089
+
2090
+
2091
+ sort_array.__doc__ = pysparkfuncs.sort_array.__doc__
2092
+
2093
+
2094
+ def struct(
2095
+ *cols: Union["ColumnOrName", List["ColumnOrName"], Tuple["ColumnOrName", ...]]
2096
+ ) -> Column:
2097
+ if len(cols) == 1 and isinstance(cols[0], (list, set, tuple)):
2098
+ cols = cols[0] # type: ignore[assignment]
2099
+ return _invoke_function_over_columns("struct", *cols) # type: ignore[arg-type]
2100
+
2101
+
2102
+ struct.__doc__ = pysparkfuncs.struct.__doc__
2103
+
2104
+
2105
+ def named_struct(*cols: "ColumnOrName") -> Column:
2106
+ return _invoke_function_over_columns("named_struct", *cols)
2107
+
2108
+
2109
+ named_struct.__doc__ = pysparkfuncs.named_struct.__doc__
2110
+
2111
+
2112
+ def to_csv(col: "ColumnOrName", options: Optional[Dict[str, str]] = None) -> Column:
2113
+ if options is None:
2114
+ return _invoke_function("to_csv", _to_col(col))
2115
+ else:
2116
+ return _invoke_function("to_csv", _to_col(col), _options_to_col(options))
2117
+
2118
+
2119
+ to_csv.__doc__ = pysparkfuncs.to_csv.__doc__
2120
+
2121
+
2122
+ def to_json(col: "ColumnOrName", options: Optional[Dict[str, str]] = None) -> Column:
2123
+ if options is None:
2124
+ return _invoke_function("to_json", _to_col(col))
2125
+ else:
2126
+ return _invoke_function("to_json", _to_col(col), _options_to_col(options))
2127
+
2128
+
2129
+ to_json.__doc__ = pysparkfuncs.to_json.__doc__
2130
+
2131
+
2132
+ def transform(
2133
+ col: "ColumnOrName",
2134
+ f: Union[Callable[[Column], Column], Callable[[Column, Column], Column]],
2135
+ ) -> Column:
2136
+ return _invoke_higher_order_function("transform", [col], [f])
2137
+
2138
+
2139
+ transform.__doc__ = pysparkfuncs.transform.__doc__
2140
+
2141
+
2142
+ def transform_keys(col: "ColumnOrName", f: Callable[[Column, Column], Column]) -> Column:
2143
+ return _invoke_higher_order_function("transform_keys", [col], [f])
2144
+
2145
+
2146
+ transform_keys.__doc__ = pysparkfuncs.transform_keys.__doc__
2147
+
2148
+
2149
+ def transform_values(col: "ColumnOrName", f: Callable[[Column, Column], Column]) -> Column:
2150
+ return _invoke_higher_order_function("transform_values", [col], [f])
2151
+
2152
+
2153
+ transform_values.__doc__ = pysparkfuncs.transform_values.__doc__
2154
+
2155
+
2156
+ def zip_with(
2157
+ left: "ColumnOrName",
2158
+ right: "ColumnOrName",
2159
+ f: Callable[[Column, Column], Column],
2160
+ ) -> Column:
2161
+ return _invoke_higher_order_function("zip_with", [left, right], [f])
2162
+
2163
+
2164
+ zip_with.__doc__ = pysparkfuncs.zip_with.__doc__
2165
+
2166
+
2167
+ # String/Binary functions
2168
+
2169
+
2170
+ def upper(col: "ColumnOrName") -> Column:
2171
+ return _invoke_function_over_columns("upper", col)
2172
+
2173
+
2174
+ upper.__doc__ = pysparkfuncs.upper.__doc__
2175
+
2176
+
2177
+ def lower(col: "ColumnOrName") -> Column:
2178
+ return _invoke_function_over_columns("lower", col)
2179
+
2180
+
2181
+ lower.__doc__ = pysparkfuncs.lower.__doc__
2182
+
2183
+
2184
+ def ascii(col: "ColumnOrName") -> Column:
2185
+ return _invoke_function_over_columns("ascii", col)
2186
+
2187
+
2188
+ ascii.__doc__ = pysparkfuncs.ascii.__doc__
2189
+
2190
+
2191
+ def base64(col: "ColumnOrName") -> Column:
2192
+ return _invoke_function_over_columns("base64", col)
2193
+
2194
+
2195
+ base64.__doc__ = pysparkfuncs.base64.__doc__
2196
+
2197
+
2198
+ def unbase64(col: "ColumnOrName") -> Column:
2199
+ return _invoke_function_over_columns("unbase64", col)
2200
+
2201
+
2202
+ unbase64.__doc__ = pysparkfuncs.unbase64.__doc__
2203
+
2204
+
2205
+ def ltrim(col: "ColumnOrName") -> Column:
2206
+ return _invoke_function_over_columns("ltrim", col)
2207
+
2208
+
2209
+ ltrim.__doc__ = pysparkfuncs.ltrim.__doc__
2210
+
2211
+
2212
+ def rtrim(col: "ColumnOrName") -> Column:
2213
+ return _invoke_function_over_columns("rtrim", col)
2214
+
2215
+
2216
+ rtrim.__doc__ = pysparkfuncs.rtrim.__doc__
2217
+
2218
+
2219
+ def trim(col: "ColumnOrName") -> Column:
2220
+ return _invoke_function_over_columns("trim", col)
2221
+
2222
+
2223
+ trim.__doc__ = pysparkfuncs.trim.__doc__
2224
+
2225
+
2226
+ def concat_ws(sep: str, *cols: "ColumnOrName") -> Column:
2227
+ return _invoke_function("concat_ws", lit(sep), *[_to_col(c) for c in cols])
2228
+
2229
+
2230
+ concat_ws.__doc__ = pysparkfuncs.concat_ws.__doc__
2231
+
2232
+
2233
+ def decode(col: "ColumnOrName", charset: str) -> Column:
2234
+ return _invoke_function("decode", _to_col(col), lit(charset))
2235
+
2236
+
2237
+ decode.__doc__ = pysparkfuncs.decode.__doc__
2238
+
2239
+
2240
+ def encode(col: "ColumnOrName", charset: str) -> Column:
2241
+ return _invoke_function("encode", _to_col(col), lit(charset))
2242
+
2243
+
2244
+ encode.__doc__ = pysparkfuncs.encode.__doc__
2245
+
2246
+
2247
+ def format_number(col: "ColumnOrName", d: int) -> Column:
2248
+ return _invoke_function("format_number", _to_col(col), lit(d))
2249
+
2250
+
2251
+ format_number.__doc__ = pysparkfuncs.format_number.__doc__
2252
+
2253
+
2254
+ def format_string(format: str, *cols: "ColumnOrName") -> Column:
2255
+ return _invoke_function("format_string", lit(format), *[_to_col(c) for c in cols])
2256
+
2257
+
2258
+ format_string.__doc__ = pysparkfuncs.format_string.__doc__
2259
+
2260
+
2261
+ def instr(str: "ColumnOrName", substr: str) -> Column:
2262
+ return _invoke_function("instr", _to_col(str), lit(substr))
2263
+
2264
+
2265
+ instr.__doc__ = pysparkfuncs.instr.__doc__
2266
+
2267
+
2268
+ def overlay(
2269
+ src: "ColumnOrName",
2270
+ replace: "ColumnOrName",
2271
+ pos: Union["ColumnOrName", int],
2272
+ len: Union["ColumnOrName", int] = -1,
2273
+ ) -> Column:
2274
+ if not isinstance(pos, (int, str, Column)):
2275
+ raise PySparkTypeError(
2276
+ error_class="NOT_COLUMN_OR_INT_OR_STR",
2277
+ message_parameters={"arg_name": "pos", "arg_type": type(pos).__name__},
2278
+ )
2279
+ if len is not None and not isinstance(len, (int, str, Column)):
2280
+ raise PySparkTypeError(
2281
+ error_class="NOT_COLUMN_OR_INT_OR_STR",
2282
+ message_parameters={"arg_name": "len", "arg_type": type(len).__name__},
2283
+ )
2284
+
2285
+ if isinstance(pos, int):
2286
+ pos = lit(pos)
2287
+ if isinstance(len, int):
2288
+ len = lit(len)
2289
+
2290
+ return _invoke_function_over_columns("overlay", src, replace, pos, len)
2291
+
2292
+
2293
+ overlay.__doc__ = pysparkfuncs.overlay.__doc__
2294
+
2295
+
2296
+ def sentences(
2297
+ string: "ColumnOrName",
2298
+ language: Optional["ColumnOrName"] = None,
2299
+ country: Optional["ColumnOrName"] = None,
2300
+ ) -> Column:
2301
+ _language = lit("") if language is None else _to_col(language)
2302
+ _country = lit("") if country is None else _to_col(country)
2303
+
2304
+ return _invoke_function("sentences", _to_col(string), _language, _country)
2305
+
2306
+
2307
+ sentences.__doc__ = pysparkfuncs.sentences.__doc__
2308
+
2309
+
2310
+ def substring(str: "ColumnOrName", pos: int, len: int) -> Column:
2311
+ return _invoke_function("substring", _to_col(str), lit(pos), lit(len))
2312
+
2313
+
2314
+ substring.__doc__ = pysparkfuncs.substring.__doc__
2315
+
2316
+
2317
+ def substring_index(str: "ColumnOrName", delim: str, count: int) -> Column:
2318
+ return _invoke_function("substring_index", _to_col(str), lit(delim), lit(count))
2319
+
2320
+
2321
+ substring_index.__doc__ = pysparkfuncs.substring_index.__doc__
2322
+
2323
+
2324
+ def levenshtein(
2325
+ left: "ColumnOrName", right: "ColumnOrName", threshold: Optional[int] = None
2326
+ ) -> Column:
2327
+ if threshold is None:
2328
+ return _invoke_function_over_columns("levenshtein", left, right)
2329
+ else:
2330
+ return _invoke_function("levenshtein", _to_col(left), _to_col(right), lit(threshold))
2331
+
2332
+
2333
+ levenshtein.__doc__ = pysparkfuncs.levenshtein.__doc__
2334
+
2335
+
2336
+ def locate(substr: str, str: "ColumnOrName", pos: int = 1) -> Column:
2337
+ return _invoke_function("locate", lit(substr), _to_col(str), lit(pos))
2338
+
2339
+
2340
+ locate.__doc__ = pysparkfuncs.locate.__doc__
2341
+
2342
+
2343
+ def lpad(col: "ColumnOrName", len: int, pad: str) -> Column:
2344
+ return _invoke_function("lpad", _to_col(col), lit(len), lit(pad))
2345
+
2346
+
2347
+ lpad.__doc__ = pysparkfuncs.lpad.__doc__
2348
+
2349
+
2350
+ def rpad(col: "ColumnOrName", len: int, pad: str) -> Column:
2351
+ return _invoke_function("rpad", _to_col(col), lit(len), lit(pad))
2352
+
2353
+
2354
+ rpad.__doc__ = pysparkfuncs.rpad.__doc__
2355
+
2356
+
2357
+ def repeat(col: "ColumnOrName", n: int) -> Column:
2358
+ return _invoke_function("repeat", _to_col(col), lit(n))
2359
+
2360
+
2361
+ repeat.__doc__ = pysparkfuncs.repeat.__doc__
2362
+
2363
+
2364
+ def split(str: "ColumnOrName", pattern: str, limit: int = -1) -> Column:
2365
+ return _invoke_function("split", _to_col(str), lit(pattern), lit(limit))
2366
+
2367
+
2368
+ split.__doc__ = pysparkfuncs.split.__doc__
2369
+
2370
+
2371
+ def rlike(str: "ColumnOrName", regexp: "ColumnOrName") -> Column:
2372
+ return _invoke_function_over_columns("rlike", str, regexp)
2373
+
2374
+
2375
+ rlike.__doc__ = pysparkfuncs.rlike.__doc__
2376
+
2377
+
2378
+ def regexp(str: "ColumnOrName", regexp: "ColumnOrName") -> Column:
2379
+ return _invoke_function_over_columns("regexp", str, regexp)
2380
+
2381
+
2382
+ regexp.__doc__ = pysparkfuncs.regexp.__doc__
2383
+
2384
+
2385
+ def regexp_like(str: "ColumnOrName", regexp: "ColumnOrName") -> Column:
2386
+ return _invoke_function_over_columns("regexp_like", str, regexp)
2387
+
2388
+
2389
+ regexp_like.__doc__ = pysparkfuncs.regexp_like.__doc__
2390
+
2391
+
2392
+ def regexp_count(str: "ColumnOrName", regexp: "ColumnOrName") -> Column:
2393
+ return _invoke_function_over_columns("regexp_count", str, regexp)
2394
+
2395
+
2396
+ regexp_count.__doc__ = pysparkfuncs.regexp_count.__doc__
2397
+
2398
+
2399
+ def regexp_extract(str: "ColumnOrName", pattern: str, idx: int) -> Column:
2400
+ return _invoke_function("regexp_extract", _to_col(str), lit(pattern), lit(idx))
2401
+
2402
+
2403
+ regexp_extract.__doc__ = pysparkfuncs.regexp_extract.__doc__
2404
+
2405
+
2406
+ def regexp_extract_all(
2407
+ str: "ColumnOrName", regexp: "ColumnOrName", idx: Optional[Union[int, Column]] = None
2408
+ ) -> Column:
2409
+ if idx is None:
2410
+ return _invoke_function_over_columns("regexp_extract_all", str, regexp)
2411
+ else:
2412
+ if isinstance(idx, int):
2413
+ idx = lit(idx)
2414
+ return _invoke_function_over_columns("regexp_extract_all", str, regexp, idx)
2415
+
2416
+
2417
+ regexp_extract_all.__doc__ = pysparkfuncs.regexp_extract_all.__doc__
2418
+
2419
+
2420
+ def regexp_replace(
2421
+ string: "ColumnOrName", pattern: Union[str, Column], replacement: Union[str, Column]
2422
+ ) -> Column:
2423
+ if isinstance(pattern, str):
2424
+ pattern = lit(pattern)
2425
+
2426
+ if isinstance(replacement, str):
2427
+ replacement = lit(replacement)
2428
+
2429
+ return _invoke_function("regexp_replace", _to_col(string), pattern, replacement)
2430
+
2431
+
2432
+ regexp_replace.__doc__ = pysparkfuncs.regexp_replace.__doc__
2433
+
2434
+
2435
+ def regexp_substr(str: "ColumnOrName", regexp: "ColumnOrName") -> Column:
2436
+ return _invoke_function_over_columns("regexp_substr", str, regexp)
2437
+
2438
+
2439
+ regexp_substr.__doc__ = pysparkfuncs.regexp_substr.__doc__
2440
+
2441
+
2442
+ def regexp_instr(
2443
+ str: "ColumnOrName", regexp: "ColumnOrName", idx: Optional[Union[int, Column]] = None
2444
+ ) -> Column:
2445
+ if idx is None:
2446
+ return _invoke_function_over_columns("regexp_instr", str, regexp)
2447
+ else:
2448
+ if isinstance(idx, int):
2449
+ idx = lit(idx)
2450
+ return _invoke_function_over_columns("regexp_instr", str, regexp, idx)
2451
+
2452
+
2453
+ regexp_instr.__doc__ = pysparkfuncs.regexp_instr.__doc__
2454
+
2455
+
2456
+ def initcap(col: "ColumnOrName") -> Column:
2457
+ return _invoke_function_over_columns("initcap", col)
2458
+
2459
+
2460
+ initcap.__doc__ = pysparkfuncs.initcap.__doc__
2461
+
2462
+
2463
+ def soundex(col: "ColumnOrName") -> Column:
2464
+ return _invoke_function_over_columns("soundex", col)
2465
+
2466
+
2467
+ soundex.__doc__ = pysparkfuncs.soundex.__doc__
2468
+
2469
+
2470
+ def length(col: "ColumnOrName") -> Column:
2471
+ return _invoke_function_over_columns("length", col)
2472
+
2473
+
2474
+ length.__doc__ = pysparkfuncs.length.__doc__
2475
+
2476
+
2477
+ def octet_length(col: "ColumnOrName") -> Column:
2478
+ return _invoke_function_over_columns("octet_length", col)
2479
+
2480
+
2481
+ octet_length.__doc__ = pysparkfuncs.octet_length.__doc__
2482
+
2483
+
2484
+ def bit_length(col: "ColumnOrName") -> Column:
2485
+ return _invoke_function_over_columns("bit_length", col)
2486
+
2487
+
2488
+ bit_length.__doc__ = pysparkfuncs.bit_length.__doc__
2489
+
2490
+
2491
+ def translate(srcCol: "ColumnOrName", matching: str, replace: str) -> Column:
2492
+ return _invoke_function("translate", _to_col(srcCol), lit(matching), lit(replace))
2493
+
2494
+
2495
+ translate.__doc__ = pysparkfuncs.translate.__doc__
2496
+
2497
+
2498
+ def to_binary(col: "ColumnOrName", format: Optional["ColumnOrName"] = None) -> Column:
2499
+ if format is not None:
2500
+ return _invoke_function_over_columns("to_binary", col, format)
2501
+ else:
2502
+ return _invoke_function_over_columns("to_binary", col)
2503
+
2504
+
2505
+ to_binary.__doc__ = pysparkfuncs.to_binary.__doc__
2506
+
2507
+
2508
+ def to_char(col: "ColumnOrName", format: "ColumnOrName") -> Column:
2509
+ return _invoke_function_over_columns("to_char", col, format)
2510
+
2511
+
2512
+ to_char.__doc__ = pysparkfuncs.to_char.__doc__
2513
+
2514
+
2515
+ def to_varchar(col: "ColumnOrName", format: "ColumnOrName") -> Column:
2516
+ return _invoke_function_over_columns("to_varchar", col, format)
2517
+
2518
+
2519
+ to_varchar.__doc__ = pysparkfuncs.to_varchar.__doc__
2520
+
2521
+
2522
+ def to_number(col: "ColumnOrName", format: "ColumnOrName") -> Column:
2523
+ return _invoke_function_over_columns("to_number", col, format)
2524
+
2525
+
2526
+ to_number.__doc__ = pysparkfuncs.to_number.__doc__
2527
+
2528
+
2529
+ def replace(
2530
+ src: "ColumnOrName", search: "ColumnOrName", replace: Optional["ColumnOrName"] = None
2531
+ ) -> Column:
2532
+ if replace is not None:
2533
+ return _invoke_function_over_columns("replace", src, search, replace)
2534
+ else:
2535
+ return _invoke_function_over_columns("replace", src, search)
2536
+
2537
+
2538
+ replace.__doc__ = pysparkfuncs.replace.__doc__
2539
+
2540
+
2541
+ def split_part(src: "ColumnOrName", delimiter: "ColumnOrName", partNum: "ColumnOrName") -> Column:
2542
+ return _invoke_function_over_columns("split_part", src, delimiter, partNum)
2543
+
2544
+
2545
+ split_part.__doc__ = pysparkfuncs.split_part.__doc__
2546
+
2547
+
2548
+ def substr(
2549
+ str: "ColumnOrName", pos: "ColumnOrName", len: Optional["ColumnOrName"] = None
2550
+ ) -> Column:
2551
+ if len is not None:
2552
+ return _invoke_function_over_columns("substr", str, pos, len)
2553
+ else:
2554
+ return _invoke_function_over_columns("substr", str, pos)
2555
+
2556
+
2557
+ substr.__doc__ = pysparkfuncs.substr.__doc__
2558
+
2559
+
2560
+ def parse_url(
2561
+ url: "ColumnOrName", partToExtract: "ColumnOrName", key: Optional["ColumnOrName"] = None
2562
+ ) -> Column:
2563
+ if key is not None:
2564
+ return _invoke_function_over_columns("parse_url", url, partToExtract, key)
2565
+ else:
2566
+ return _invoke_function_over_columns("parse_url", url, partToExtract)
2567
+
2568
+
2569
+ parse_url.__doc__ = pysparkfuncs.parse_url.__doc__
2570
+
2571
+
2572
+ def printf(format: "ColumnOrName", *cols: "ColumnOrName") -> Column:
2573
+ return _invoke_function("printf", _to_col(format), *[_to_col(c) for c in cols])
2574
+
2575
+
2576
+ printf.__doc__ = pysparkfuncs.printf.__doc__
2577
+
2578
+
2579
+ def url_decode(str: "ColumnOrName") -> Column:
2580
+ return _invoke_function_over_columns("url_decode", str)
2581
+
2582
+
2583
+ url_decode.__doc__ = pysparkfuncs.url_decode.__doc__
2584
+
2585
+
2586
+ def url_encode(str: "ColumnOrName") -> Column:
2587
+ return _invoke_function_over_columns("url_encode", str)
2588
+
2589
+
2590
+ url_encode.__doc__ = pysparkfuncs.url_encode.__doc__
2591
+
2592
+
2593
+ def position(
2594
+ substr: "ColumnOrName", str: "ColumnOrName", start: Optional["ColumnOrName"] = None
2595
+ ) -> Column:
2596
+ if start is not None:
2597
+ return _invoke_function_over_columns("position", substr, str, start)
2598
+ else:
2599
+ return _invoke_function_over_columns("position", substr, str)
2600
+
2601
+
2602
+ position.__doc__ = pysparkfuncs.position.__doc__
2603
+
2604
+
2605
+ def endswith(str: "ColumnOrName", suffix: "ColumnOrName") -> Column:
2606
+ return _invoke_function_over_columns("endswith", str, suffix)
2607
+
2608
+
2609
+ endswith.__doc__ = pysparkfuncs.endswith.__doc__
2610
+
2611
+
2612
+ def startswith(str: "ColumnOrName", prefix: "ColumnOrName") -> Column:
2613
+ return _invoke_function_over_columns("startswith", str, prefix)
2614
+
2615
+
2616
+ startswith.__doc__ = pysparkfuncs.startswith.__doc__
2617
+
2618
+
2619
+ def char(col: "ColumnOrName") -> Column:
2620
+ return _invoke_function_over_columns("char", col)
2621
+
2622
+
2623
+ char.__doc__ = pysparkfuncs.char.__doc__
2624
+
2625
+
2626
+ def try_to_binary(col: "ColumnOrName", format: Optional["ColumnOrName"] = None) -> Column:
2627
+ if format is not None:
2628
+ return _invoke_function_over_columns("try_to_binary", col, format)
2629
+ else:
2630
+ return _invoke_function_over_columns("try_to_binary", col)
2631
+
2632
+
2633
+ try_to_binary.__doc__ = pysparkfuncs.try_to_binary.__doc__
2634
+
2635
+
2636
+ def try_to_number(col: "ColumnOrName", format: "ColumnOrName") -> Column:
2637
+ return _invoke_function_over_columns("try_to_number", col, format)
2638
+
2639
+
2640
+ try_to_number.__doc__ = pysparkfuncs.try_to_number.__doc__
2641
+
2642
+
2643
+ def btrim(str: "ColumnOrName", trim: Optional["ColumnOrName"] = None) -> Column:
2644
+ if trim is not None:
2645
+ return _invoke_function_over_columns("btrim", str, trim)
2646
+ else:
2647
+ return _invoke_function_over_columns("btrim", str)
2648
+
2649
+
2650
+ btrim.__doc__ = pysparkfuncs.btrim.__doc__
2651
+
2652
+
2653
+ def char_length(str: "ColumnOrName") -> Column:
2654
+ return _invoke_function_over_columns("char_length", str)
2655
+
2656
+
2657
+ char_length.__doc__ = pysparkfuncs.char_length.__doc__
2658
+
2659
+
2660
+ def character_length(str: "ColumnOrName") -> Column:
2661
+ return _invoke_function_over_columns("character_length", str)
2662
+
2663
+
2664
+ character_length.__doc__ = pysparkfuncs.character_length.__doc__
2665
+
2666
+
2667
+ def contains(left: "ColumnOrName", right: "ColumnOrName") -> Column:
2668
+ return _invoke_function_over_columns("contains", left, right)
2669
+
2670
+
2671
+ contains.__doc__ = pysparkfuncs.contains.__doc__
2672
+
2673
+
2674
+ def elt(*inputs: "ColumnOrName") -> Column:
2675
+ return _invoke_function("elt", *[_to_col(input) for input in inputs])
2676
+
2677
+
2678
+ elt.__doc__ = pysparkfuncs.elt.__doc__
2679
+
2680
+
2681
+ def find_in_set(str: "ColumnOrName", str_array: "ColumnOrName") -> Column:
2682
+ return _invoke_function_over_columns("find_in_set", str, str_array)
2683
+
2684
+
2685
+ find_in_set.__doc__ = pysparkfuncs.find_in_set.__doc__
2686
+
2687
+
2688
+ def like(
2689
+ str: "ColumnOrName", pattern: "ColumnOrName", escapeChar: Optional["Column"] = None
2690
+ ) -> Column:
2691
+ if escapeChar is not None:
2692
+ return _invoke_function_over_columns("like", str, pattern, escapeChar)
2693
+ else:
2694
+ return _invoke_function_over_columns("like", str, pattern)
2695
+
2696
+
2697
+ like.__doc__ = pysparkfuncs.like.__doc__
2698
+
2699
+
2700
+ def ilike(
2701
+ str: "ColumnOrName", pattern: "ColumnOrName", escapeChar: Optional["Column"] = None
2702
+ ) -> Column:
2703
+ if escapeChar is not None:
2704
+ return _invoke_function_over_columns("ilike", str, pattern, escapeChar)
2705
+ else:
2706
+ return _invoke_function_over_columns("ilike", str, pattern)
2707
+
2708
+
2709
+ ilike.__doc__ = pysparkfuncs.ilike.__doc__
2710
+
2711
+
2712
+ def lcase(str: "ColumnOrName") -> Column:
2713
+ return _invoke_function_over_columns("lcase", str)
2714
+
2715
+
2716
+ lcase.__doc__ = pysparkfuncs.lcase.__doc__
2717
+
2718
+
2719
+ def ucase(str: "ColumnOrName") -> Column:
2720
+ return _invoke_function_over_columns("ucase", str)
2721
+
2722
+
2723
+ ucase.__doc__ = pysparkfuncs.ucase.__doc__
2724
+
2725
+
2726
+ def left(str: "ColumnOrName", len: "ColumnOrName") -> Column:
2727
+ return _invoke_function_over_columns("left", str, len)
2728
+
2729
+
2730
+ left.__doc__ = pysparkfuncs.left.__doc__
2731
+
2732
+
2733
+ def right(str: "ColumnOrName", len: "ColumnOrName") -> Column:
2734
+ return _invoke_function_over_columns("right", str, len)
2735
+
2736
+
2737
+ right.__doc__ = pysparkfuncs.right.__doc__
2738
+
2739
+
2740
+ def mask(
2741
+ col: "ColumnOrName",
2742
+ upperChar: Optional["ColumnOrName"] = None,
2743
+ lowerChar: Optional["ColumnOrName"] = None,
2744
+ digitChar: Optional["ColumnOrName"] = None,
2745
+ otherChar: Optional["ColumnOrName"] = None,
2746
+ ) -> Column:
2747
+ _upperChar = lit("X") if upperChar is None else upperChar
2748
+ _lowerChar = lit("x") if lowerChar is None else lowerChar
2749
+ _digitChar = lit("n") if digitChar is None else digitChar
2750
+ _otherChar = lit(None) if otherChar is None else otherChar
2751
+
2752
+ return _invoke_function_over_columns(
2753
+ "mask", col, _upperChar, _lowerChar, _digitChar, _otherChar
2754
+ )
2755
+
2756
+
2757
+ mask.__doc__ = pysparkfuncs.mask.__doc__
2758
+
2759
+
2760
+ # Date/Timestamp functions
2761
+ # TODO(SPARK-41455): Resolve dtypes inconsistencies for:
2762
+ # to_timestamp, from_utc_timestamp, to_utc_timestamp,
2763
+ # timestamp_seconds, current_timestamp, date_trunc
2764
+
2765
+
2766
+ def curdate() -> Column:
2767
+ return _invoke_function("curdate")
2768
+
2769
+
2770
+ curdate.__doc__ = pysparkfuncs.curdate.__doc__
2771
+
2772
+
2773
+ def current_date() -> Column:
2774
+ return _invoke_function("current_date")
2775
+
2776
+
2777
+ current_date.__doc__ = pysparkfuncs.current_date.__doc__
2778
+
2779
+
2780
+ def current_timestamp() -> Column:
2781
+ return _invoke_function("current_timestamp")
2782
+
2783
+
2784
+ current_timestamp.__doc__ = pysparkfuncs.current_timestamp.__doc__
2785
+
2786
+
2787
+ def now() -> Column:
2788
+ return _invoke_function("current_timestamp")
2789
+
2790
+
2791
+ now.__doc__ = pysparkfuncs.now.__doc__
2792
+
2793
+
2794
+ def current_timezone() -> Column:
2795
+ return _invoke_function("current_timezone")
2796
+
2797
+
2798
+ current_timezone.__doc__ = pysparkfuncs.current_timezone.__doc__
2799
+
2800
+
2801
+ def localtimestamp() -> Column:
2802
+ return _invoke_function("localtimestamp")
2803
+
2804
+
2805
+ localtimestamp.__doc__ = pysparkfuncs.localtimestamp.__doc__
2806
+
2807
+
2808
+ def date_format(date: "ColumnOrName", format: str) -> Column:
2809
+ return _invoke_function("date_format", _to_col(date), lit(format))
2810
+
2811
+
2812
+ date_format.__doc__ = pysparkfuncs.date_format.__doc__
2813
+
2814
+
2815
+ def year(col: "ColumnOrName") -> Column:
2816
+ return _invoke_function_over_columns("year", col)
2817
+
2818
+
2819
+ year.__doc__ = pysparkfuncs.year.__doc__
2820
+
2821
+
2822
+ def quarter(col: "ColumnOrName") -> Column:
2823
+ return _invoke_function_over_columns("quarter", col)
2824
+
2825
+
2826
+ quarter.__doc__ = pysparkfuncs.quarter.__doc__
2827
+
2828
+
2829
+ def month(col: "ColumnOrName") -> Column:
2830
+ return _invoke_function_over_columns("month", col)
2831
+
2832
+
2833
+ month.__doc__ = pysparkfuncs.month.__doc__
2834
+
2835
+
2836
+ def dayofweek(col: "ColumnOrName") -> Column:
2837
+ return _invoke_function_over_columns("dayofweek", col)
2838
+
2839
+
2840
+ dayofweek.__doc__ = pysparkfuncs.dayofweek.__doc__
2841
+
2842
+
2843
+ def dayofmonth(col: "ColumnOrName") -> Column:
2844
+ return _invoke_function_over_columns("dayofmonth", col)
2845
+
2846
+
2847
+ dayofmonth.__doc__ = pysparkfuncs.dayofmonth.__doc__
2848
+
2849
+
2850
+ def day(col: "ColumnOrName") -> Column:
2851
+ return _invoke_function_over_columns("day", col)
2852
+
2853
+
2854
+ day.__doc__ = pysparkfuncs.day.__doc__
2855
+
2856
+
2857
+ def dayofyear(col: "ColumnOrName") -> Column:
2858
+ return _invoke_function_over_columns("dayofyear", col)
2859
+
2860
+
2861
+ dayofyear.__doc__ = pysparkfuncs.dayofyear.__doc__
2862
+
2863
+
2864
+ def hour(col: "ColumnOrName") -> Column:
2865
+ return _invoke_function_over_columns("hour", col)
2866
+
2867
+
2868
+ hour.__doc__ = pysparkfuncs.hour.__doc__
2869
+
2870
+
2871
+ def minute(col: "ColumnOrName") -> Column:
2872
+ return _invoke_function_over_columns("minute", col)
2873
+
2874
+
2875
+ minute.__doc__ = pysparkfuncs.minute.__doc__
2876
+
2877
+
2878
+ def second(col: "ColumnOrName") -> Column:
2879
+ return _invoke_function_over_columns("second", col)
2880
+
2881
+
2882
+ second.__doc__ = pysparkfuncs.second.__doc__
2883
+
2884
+
2885
+ def weekofyear(col: "ColumnOrName") -> Column:
2886
+ return _invoke_function_over_columns("weekofyear", col)
2887
+
2888
+
2889
+ weekofyear.__doc__ = pysparkfuncs.weekofyear.__doc__
2890
+
2891
+
2892
+ def weekday(col: "ColumnOrName") -> Column:
2893
+ return _invoke_function_over_columns("weekday", col)
2894
+
2895
+
2896
+ weekday.__doc__ = pysparkfuncs.weekday.__doc__
2897
+
2898
+
2899
+ def extract(field: "ColumnOrName", source: "ColumnOrName") -> Column:
2900
+ return _invoke_function_over_columns("extract", field, source)
2901
+
2902
+
2903
+ extract.__doc__ = pysparkfuncs.extract.__doc__
2904
+
2905
+
2906
+ def date_part(field: "ColumnOrName", source: "ColumnOrName") -> Column:
2907
+ return _invoke_function_over_columns("date_part", field, source)
2908
+
2909
+
2910
+ extract.__doc__ = pysparkfuncs.extract.__doc__
2911
+
2912
+
2913
+ def datepart(field: "ColumnOrName", source: "ColumnOrName") -> Column:
2914
+ return _invoke_function_over_columns("datepart", field, source)
2915
+
2916
+
2917
+ extract.__doc__ = pysparkfuncs.extract.__doc__
2918
+
2919
+
2920
+ def make_date(year: "ColumnOrName", month: "ColumnOrName", day: "ColumnOrName") -> Column:
2921
+ return _invoke_function_over_columns("make_date", year, month, day)
2922
+
2923
+
2924
+ make_date.__doc__ = pysparkfuncs.make_date.__doc__
2925
+
2926
+
2927
+ def date_add(start: "ColumnOrName", days: Union["ColumnOrName", int]) -> Column:
2928
+ days = lit(days) if isinstance(days, int) else days
2929
+ return _invoke_function_over_columns("date_add", start, days)
2930
+
2931
+
2932
+ date_add.__doc__ = pysparkfuncs.date_add.__doc__
2933
+
2934
+
2935
+ def dateadd(start: "ColumnOrName", days: Union["ColumnOrName", int]) -> Column:
2936
+ days = lit(days) if isinstance(days, int) else days
2937
+ return _invoke_function_over_columns("dateadd", start, days)
2938
+
2939
+
2940
+ dateadd.__doc__ = pysparkfuncs.dateadd.__doc__
2941
+
2942
+
2943
+ def date_sub(start: "ColumnOrName", days: Union["ColumnOrName", int]) -> Column:
2944
+ days = lit(days) if isinstance(days, int) else days
2945
+ return _invoke_function_over_columns("date_sub", start, days)
2946
+
2947
+
2948
+ date_sub.__doc__ = pysparkfuncs.date_sub.__doc__
2949
+
2950
+
2951
+ def datediff(end: "ColumnOrName", start: "ColumnOrName") -> Column:
2952
+ return _invoke_function_over_columns("datediff", end, start)
2953
+
2954
+
2955
+ datediff.__doc__ = pysparkfuncs.datediff.__doc__
2956
+
2957
+
2958
+ def date_diff(end: "ColumnOrName", start: "ColumnOrName") -> Column:
2959
+ return _invoke_function_over_columns("date_diff", end, start)
2960
+
2961
+
2962
+ date_diff.__doc__ = pysparkfuncs.date_diff.__doc__
2963
+
2964
+
2965
+ def date_from_unix_date(days: "ColumnOrName") -> Column:
2966
+ return _invoke_function_over_columns("date_from_unix_date", days)
2967
+
2968
+
2969
+ date_from_unix_date.__doc__ = pysparkfuncs.date_from_unix_date.__doc__
2970
+
2971
+
2972
+ def add_months(start: "ColumnOrName", months: Union["ColumnOrName", int]) -> Column:
2973
+ months = lit(months) if isinstance(months, int) else months
2974
+ return _invoke_function_over_columns("add_months", start, months)
2975
+
2976
+
2977
+ add_months.__doc__ = pysparkfuncs.add_months.__doc__
2978
+
2979
+
2980
+ def months_between(date1: "ColumnOrName", date2: "ColumnOrName", roundOff: bool = True) -> Column:
2981
+ return _invoke_function("months_between", _to_col(date1), _to_col(date2), lit(roundOff))
2982
+
2983
+
2984
+ months_between.__doc__ = pysparkfuncs.months_between.__doc__
2985
+
2986
+
2987
+ def to_date(col: "ColumnOrName", format: Optional[str] = None) -> Column:
2988
+ if format is None:
2989
+ return _invoke_function_over_columns("to_date", col)
2990
+ else:
2991
+ return _invoke_function("to_date", _to_col(col), lit(format))
2992
+
2993
+
2994
+ to_date.__doc__ = pysparkfuncs.to_date.__doc__
2995
+
2996
+
2997
+ def unix_date(col: "ColumnOrName") -> Column:
2998
+ return _invoke_function_over_columns("unix_date", col)
2999
+
3000
+
3001
+ unix_date.__doc__ = pysparkfuncs.unix_date.__doc__
3002
+
3003
+
3004
+ def unix_micros(col: "ColumnOrName") -> Column:
3005
+ return _invoke_function_over_columns("unix_micros", col)
3006
+
3007
+
3008
+ unix_micros.__doc__ = pysparkfuncs.unix_micros.__doc__
3009
+
3010
+
3011
+ def unix_millis(col: "ColumnOrName") -> Column:
3012
+ return _invoke_function_over_columns("unix_millis", col)
3013
+
3014
+
3015
+ unix_millis.__doc__ = pysparkfuncs.unix_millis.__doc__
3016
+
3017
+
3018
+ def unix_seconds(col: "ColumnOrName") -> Column:
3019
+ return _invoke_function_over_columns("unix_seconds", col)
3020
+
3021
+
3022
+ unix_seconds.__doc__ = pysparkfuncs.unix_seconds.__doc__
3023
+
3024
+
3025
+ @overload
3026
+ def to_timestamp(col: "ColumnOrName") -> Column:
3027
+ ...
3028
+
3029
+
3030
+ @overload
3031
+ def to_timestamp(col: "ColumnOrName", format: str) -> Column:
3032
+ ...
3033
+
3034
+
3035
+ def to_timestamp(col: "ColumnOrName", format: Optional[str] = None) -> Column:
3036
+ if format is None:
3037
+ return _invoke_function_over_columns("to_timestamp", col)
3038
+ else:
3039
+ return _invoke_function("to_timestamp", _to_col(col), lit(format))
3040
+
3041
+
3042
+ to_timestamp.__doc__ = pysparkfuncs.to_timestamp.__doc__
3043
+
3044
+
3045
+ def try_to_timestamp(col: "ColumnOrName", format: Optional["ColumnOrName"] = None) -> Column:
3046
+ if format is not None:
3047
+ return _invoke_function_over_columns("try_to_timestamp", col, format)
3048
+ else:
3049
+ return _invoke_function_over_columns("try_to_timestamp", col)
3050
+
3051
+
3052
+ try_to_timestamp.__doc__ = pysparkfuncs.try_to_timestamp.__doc__
3053
+
3054
+
3055
+ def xpath(xml: "ColumnOrName", path: "ColumnOrName") -> Column:
3056
+ return _invoke_function_over_columns("xpath", xml, path)
3057
+
3058
+
3059
+ xpath.__doc__ = pysparkfuncs.xpath.__doc__
3060
+
3061
+
3062
+ def xpath_boolean(xml: "ColumnOrName", path: "ColumnOrName") -> Column:
3063
+ return _invoke_function_over_columns("xpath_boolean", xml, path)
3064
+
3065
+
3066
+ xpath_boolean.__doc__ = pysparkfuncs.xpath_boolean.__doc__
3067
+
3068
+
3069
+ def xpath_double(xml: "ColumnOrName", path: "ColumnOrName") -> Column:
3070
+ return _invoke_function_over_columns("xpath_double", xml, path)
3071
+
3072
+
3073
+ xpath_double.__doc__ = pysparkfuncs.xpath_double.__doc__
3074
+
3075
+
3076
+ def xpath_number(xml: "ColumnOrName", path: "ColumnOrName") -> Column:
3077
+ return _invoke_function_over_columns("xpath_number", xml, path)
3078
+
3079
+
3080
+ xpath_number.__doc__ = pysparkfuncs.xpath_number.__doc__
3081
+
3082
+
3083
+ def xpath_float(xml: "ColumnOrName", path: "ColumnOrName") -> Column:
3084
+ return _invoke_function_over_columns("xpath_float", xml, path)
3085
+
3086
+
3087
+ xpath_float.__doc__ = pysparkfuncs.xpath_float.__doc__
3088
+
3089
+
3090
+ def xpath_int(xml: "ColumnOrName", path: "ColumnOrName") -> Column:
3091
+ return _invoke_function_over_columns("xpath_int", xml, path)
3092
+
3093
+
3094
+ xpath_int.__doc__ = pysparkfuncs.xpath_int.__doc__
3095
+
3096
+
3097
+ def xpath_long(xml: "ColumnOrName", path: "ColumnOrName") -> Column:
3098
+ return _invoke_function_over_columns("xpath_long", xml, path)
3099
+
3100
+
3101
+ xpath_long.__doc__ = pysparkfuncs.xpath_long.__doc__
3102
+
3103
+
3104
+ def xpath_short(xml: "ColumnOrName", path: "ColumnOrName") -> Column:
3105
+ return _invoke_function_over_columns("xpath_short", xml, path)
3106
+
3107
+
3108
+ xpath_short.__doc__ = pysparkfuncs.xpath_short.__doc__
3109
+
3110
+
3111
+ def xpath_string(xml: "ColumnOrName", path: "ColumnOrName") -> Column:
3112
+ return _invoke_function_over_columns("xpath_string", xml, path)
3113
+
3114
+
3115
+ xpath_string.__doc__ = pysparkfuncs.xpath_string.__doc__
3116
+
3117
+
3118
+ def trunc(date: "ColumnOrName", format: str) -> Column:
3119
+ return _invoke_function("trunc", _to_col(date), lit(format))
3120
+
3121
+
3122
+ trunc.__doc__ = pysparkfuncs.trunc.__doc__
3123
+
3124
+
3125
+ def date_trunc(format: str, timestamp: "ColumnOrName") -> Column:
3126
+ return _invoke_function("date_trunc", lit(format), _to_col(timestamp))
3127
+
3128
+
3129
+ date_trunc.__doc__ = pysparkfuncs.date_trunc.__doc__
3130
+
3131
+
3132
+ def next_day(date: "ColumnOrName", dayOfWeek: str) -> Column:
3133
+ return _invoke_function("next_day", _to_col(date), lit(dayOfWeek))
3134
+
3135
+
3136
+ next_day.__doc__ = pysparkfuncs.next_day.__doc__
3137
+
3138
+
3139
+ def last_day(date: "ColumnOrName") -> Column:
3140
+ return _invoke_function_over_columns("last_day", date)
3141
+
3142
+
3143
+ last_day.__doc__ = pysparkfuncs.last_day.__doc__
3144
+
3145
+
3146
+ def from_unixtime(timestamp: "ColumnOrName", format: str = "yyyy-MM-dd HH:mm:ss") -> Column:
3147
+ return _invoke_function("from_unixtime", _to_col(timestamp), lit(format))
3148
+
3149
+
3150
+ from_unixtime.__doc__ = pysparkfuncs.from_unixtime.__doc__
3151
+
3152
+
3153
+ @overload
3154
+ def unix_timestamp(timestamp: "ColumnOrName", format: str = ...) -> Column:
3155
+ ...
3156
+
3157
+
3158
+ @overload
3159
+ def unix_timestamp() -> Column:
3160
+ ...
3161
+
3162
+
3163
+ def unix_timestamp(
3164
+ timestamp: Optional["ColumnOrName"] = None, format: str = "yyyy-MM-dd HH:mm:ss"
3165
+ ) -> Column:
3166
+ if timestamp is None:
3167
+ return _invoke_function("unix_timestamp")
3168
+ return _invoke_function("unix_timestamp", _to_col(timestamp), lit(format))
3169
+
3170
+
3171
+ unix_timestamp.__doc__ = pysparkfuncs.unix_timestamp.__doc__
3172
+
3173
+
3174
+ def from_utc_timestamp(timestamp: "ColumnOrName", tz: "ColumnOrName") -> Column:
3175
+ if isinstance(tz, str):
3176
+ tz = lit(tz)
3177
+ return _invoke_function_over_columns("from_utc_timestamp", timestamp, tz)
3178
+
3179
+
3180
+ from_utc_timestamp.__doc__ = pysparkfuncs.from_utc_timestamp.__doc__
3181
+
3182
+
3183
+ def to_utc_timestamp(timestamp: "ColumnOrName", tz: "ColumnOrName") -> Column:
3184
+ if isinstance(tz, str):
3185
+ tz = lit(tz)
3186
+ return _invoke_function_over_columns("to_utc_timestamp", timestamp, tz)
3187
+
3188
+
3189
+ to_utc_timestamp.__doc__ = pysparkfuncs.to_utc_timestamp.__doc__
3190
+
3191
+
3192
+ def timestamp_seconds(col: "ColumnOrName") -> Column:
3193
+ return _invoke_function_over_columns("timestamp_seconds", col)
3194
+
3195
+
3196
+ timestamp_seconds.__doc__ = pysparkfuncs.timestamp_seconds.__doc__
3197
+
3198
+
3199
+ def timestamp_millis(col: "ColumnOrName") -> Column:
3200
+ return _invoke_function_over_columns("timestamp_millis", col)
3201
+
3202
+
3203
+ timestamp_millis.__doc__ = pysparkfuncs.timestamp_millis.__doc__
3204
+
3205
+
3206
+ def timestamp_micros(col: "ColumnOrName") -> Column:
3207
+ return _invoke_function_over_columns("timestamp_micros", col)
3208
+
3209
+
3210
+ timestamp_micros.__doc__ = pysparkfuncs.timestamp_micros.__doc__
3211
+
3212
+
3213
+ def window(
3214
+ timeColumn: "ColumnOrName",
3215
+ windowDuration: str,
3216
+ slideDuration: Optional[str] = None,
3217
+ startTime: Optional[str] = None,
3218
+ ) -> Column:
3219
+ if windowDuration is None or not isinstance(windowDuration, str):
3220
+ raise PySparkTypeError(
3221
+ error_class="NOT_STR",
3222
+ message_parameters={
3223
+ "arg_name": "windowDuration",
3224
+ "arg_type": type(windowDuration).__name__,
3225
+ },
3226
+ )
3227
+ if slideDuration is not None and not isinstance(slideDuration, str):
3228
+ raise PySparkTypeError(
3229
+ error_class="NOT_STR",
3230
+ message_parameters={
3231
+ "arg_name": "slideDuration",
3232
+ "arg_type": type(slideDuration).__name__,
3233
+ },
3234
+ )
3235
+ if startTime is not None and not isinstance(startTime, str):
3236
+ raise PySparkTypeError(
3237
+ error_class="NOT_STR",
3238
+ message_parameters={"arg_name": "startTime", "arg_type": type(startTime).__name__},
3239
+ )
3240
+
3241
+ time_col = _to_col(timeColumn)
3242
+
3243
+ if slideDuration is not None and startTime is not None:
3244
+ return _invoke_function(
3245
+ "window", time_col, lit(windowDuration), lit(slideDuration), lit(startTime)
3246
+ )
3247
+ elif slideDuration is not None:
3248
+ return _invoke_function("window", time_col, lit(windowDuration), lit(slideDuration))
3249
+ elif startTime is not None:
3250
+ return _invoke_function(
3251
+ "window", time_col, lit(windowDuration), lit(windowDuration), lit(startTime)
3252
+ )
3253
+ else:
3254
+ return _invoke_function("window", time_col, lit(windowDuration))
3255
+
3256
+
3257
+ window.__doc__ = pysparkfuncs.window.__doc__
3258
+
3259
+
3260
+ def window_time(
3261
+ windowColumn: "ColumnOrName",
3262
+ ) -> Column:
3263
+ return _invoke_function("window_time", _to_col(windowColumn))
3264
+
3265
+
3266
+ window_time.__doc__ = pysparkfuncs.window_time.__doc__
3267
+
3268
+
3269
+ def session_window(timeColumn: "ColumnOrName", gapDuration: Union[Column, str]) -> Column:
3270
+ if gapDuration is None or not isinstance(gapDuration, (Column, str)):
3271
+ raise PySparkTypeError(
3272
+ error_class="NOT_COLUMN_OR_STR",
3273
+ message_parameters={"arg_name": "gapDuration", "arg_type": type(gapDuration).__name__},
3274
+ )
3275
+
3276
+ time_col = _to_col(timeColumn)
3277
+
3278
+ if isinstance(gapDuration, Column):
3279
+ return _invoke_function("session_window", time_col, gapDuration)
3280
+ else:
3281
+ return _invoke_function("session_window", time_col, lit(gapDuration))
3282
+
3283
+
3284
+ session_window.__doc__ = pysparkfuncs.session_window.__doc__
3285
+
3286
+
3287
+ def to_unix_timestamp(
3288
+ timestamp: "ColumnOrName",
3289
+ format: Optional["ColumnOrName"] = None,
3290
+ ) -> Column:
3291
+ if format is not None:
3292
+ return _invoke_function_over_columns("to_unix_timestamp", timestamp, format)
3293
+ else:
3294
+ return _invoke_function_over_columns("to_unix_timestamp", timestamp)
3295
+
3296
+
3297
+ to_unix_timestamp.__doc__ = pysparkfuncs.to_unix_timestamp.__doc__
3298
+
3299
+
3300
+ def to_timestamp_ltz(
3301
+ timestamp: "ColumnOrName",
3302
+ format: Optional["ColumnOrName"] = None,
3303
+ ) -> Column:
3304
+ if format is not None:
3305
+ return _invoke_function_over_columns("to_timestamp_ltz", timestamp, format)
3306
+ else:
3307
+ return _invoke_function_over_columns("to_timestamp_ltz", timestamp)
3308
+
3309
+
3310
+ to_timestamp_ltz.__doc__ = pysparkfuncs.to_timestamp_ltz.__doc__
3311
+
3312
+
3313
+ def to_timestamp_ntz(
3314
+ timestamp: "ColumnOrName",
3315
+ format: Optional["ColumnOrName"] = None,
3316
+ ) -> Column:
3317
+ if format is not None:
3318
+ return _invoke_function_over_columns("to_timestamp_ntz", timestamp, format)
3319
+ else:
3320
+ return _invoke_function_over_columns("to_timestamp_ntz", timestamp)
3321
+
3322
+
3323
+ to_timestamp_ntz.__doc__ = pysparkfuncs.to_timestamp_ntz.__doc__
3324
+
3325
+
3326
+ # Partition Transformation Functions
3327
+
3328
+
3329
+ def bucket(numBuckets: Union[Column, int], col: "ColumnOrName") -> Column:
3330
+ if isinstance(numBuckets, int):
3331
+ _numBuckets = lit(numBuckets)
3332
+ elif isinstance(numBuckets, Column):
3333
+ _numBuckets = numBuckets
3334
+ else:
3335
+ raise PySparkTypeError(
3336
+ error_class="NOT_COLUMN_OR_INT",
3337
+ message_parameters={
3338
+ "arg_name": "numBuckets",
3339
+ "arg_type": type(numBuckets).__name__,
3340
+ },
3341
+ )
3342
+
3343
+ return _invoke_function("bucket", _numBuckets, _to_col(col))
3344
+
3345
+
3346
+ bucket.__doc__ = pysparkfuncs.bucket.__doc__
3347
+
3348
+
3349
+ def years(col: "ColumnOrName") -> Column:
3350
+ return _invoke_function_over_columns("years", col)
3351
+
3352
+
3353
+ years.__doc__ = pysparkfuncs.years.__doc__
3354
+
3355
+
3356
+ def months(col: "ColumnOrName") -> Column:
3357
+ return _invoke_function_over_columns("months", col)
3358
+
3359
+
3360
+ months.__doc__ = pysparkfuncs.months.__doc__
3361
+
3362
+
3363
+ def days(col: "ColumnOrName") -> Column:
3364
+ return _invoke_function_over_columns("days", col)
3365
+
3366
+
3367
+ days.__doc__ = pysparkfuncs.days.__doc__
3368
+
3369
+
3370
+ def hours(col: "ColumnOrName") -> Column:
3371
+ return _invoke_function_over_columns("hours", col)
3372
+
3373
+
3374
+ hours.__doc__ = pysparkfuncs.hours.__doc__
3375
+
3376
+
3377
+ def convert_timezone(
3378
+ sourceTz: Optional[Column], targetTz: Column, sourceTs: "ColumnOrName"
3379
+ ) -> Column:
3380
+ if sourceTz is None:
3381
+ return _invoke_function_over_columns("convert_timezone", targetTz, sourceTs)
3382
+ else:
3383
+ return _invoke_function_over_columns("convert_timezone", sourceTz, targetTz, sourceTs)
3384
+
3385
+
3386
+ convert_timezone.__doc__ = pysparkfuncs.convert_timezone.__doc__
3387
+
3388
+
3389
+ def make_dt_interval(
3390
+ days: Optional["ColumnOrName"] = None,
3391
+ hours: Optional["ColumnOrName"] = None,
3392
+ mins: Optional["ColumnOrName"] = None,
3393
+ secs: Optional["ColumnOrName"] = None,
3394
+ ) -> Column:
3395
+ _days = lit(0) if days is None else _to_col(days)
3396
+ _hours = lit(0) if hours is None else _to_col(hours)
3397
+ _mins = lit(0) if mins is None else _to_col(mins)
3398
+ _secs = lit(decimal.Decimal(0)) if secs is None else _to_col(secs)
3399
+
3400
+ return _invoke_function_over_columns("make_dt_interval", _days, _hours, _mins, _secs)
3401
+
3402
+
3403
+ make_dt_interval.__doc__ = pysparkfuncs.make_dt_interval.__doc__
3404
+
3405
+
3406
+ def make_interval(
3407
+ years: Optional["ColumnOrName"] = None,
3408
+ months: Optional["ColumnOrName"] = None,
3409
+ weeks: Optional["ColumnOrName"] = None,
3410
+ days: Optional["ColumnOrName"] = None,
3411
+ hours: Optional["ColumnOrName"] = None,
3412
+ mins: Optional["ColumnOrName"] = None,
3413
+ secs: Optional["ColumnOrName"] = None,
3414
+ ) -> Column:
3415
+ _years = lit(0) if years is None else _to_col(years)
3416
+ _months = lit(0) if months is None else _to_col(months)
3417
+ _weeks = lit(0) if weeks is None else _to_col(weeks)
3418
+ _days = lit(0) if days is None else _to_col(days)
3419
+ _hours = lit(0) if hours is None else _to_col(hours)
3420
+ _mins = lit(0) if mins is None else _to_col(mins)
3421
+ _secs = lit(decimal.Decimal(0)) if secs is None else _to_col(secs)
3422
+
3423
+ return _invoke_function_over_columns(
3424
+ "make_interval", _years, _months, _weeks, _days, _hours, _mins, _secs
3425
+ )
3426
+
3427
+
3428
+ make_interval.__doc__ = pysparkfuncs.make_interval.__doc__
3429
+
3430
+
3431
+ def make_timestamp(
3432
+ years: "ColumnOrName",
3433
+ months: "ColumnOrName",
3434
+ days: "ColumnOrName",
3435
+ hours: "ColumnOrName",
3436
+ mins: "ColumnOrName",
3437
+ secs: "ColumnOrName",
3438
+ timezone: Optional["ColumnOrName"] = None,
3439
+ ) -> Column:
3440
+ if timezone is not None:
3441
+ return _invoke_function_over_columns(
3442
+ "make_timestamp", years, months, days, hours, mins, secs, timezone
3443
+ )
3444
+ else:
3445
+ return _invoke_function_over_columns(
3446
+ "make_timestamp", years, months, days, hours, mins, secs
3447
+ )
3448
+
3449
+
3450
+ make_timestamp.__doc__ = pysparkfuncs.make_timestamp.__doc__
3451
+
3452
+
3453
+ def make_timestamp_ltz(
3454
+ years: "ColumnOrName",
3455
+ months: "ColumnOrName",
3456
+ days: "ColumnOrName",
3457
+ hours: "ColumnOrName",
3458
+ mins: "ColumnOrName",
3459
+ secs: "ColumnOrName",
3460
+ timezone: Optional["ColumnOrName"] = None,
3461
+ ) -> Column:
3462
+ if timezone is not None:
3463
+ return _invoke_function_over_columns(
3464
+ "make_timestamp_ltz", years, months, days, hours, mins, secs, timezone
3465
+ )
3466
+ else:
3467
+ return _invoke_function_over_columns(
3468
+ "make_timestamp_ltz", years, months, days, hours, mins, secs
3469
+ )
3470
+
3471
+
3472
+ make_timestamp_ltz.__doc__ = pysparkfuncs.make_timestamp_ltz.__doc__
3473
+
3474
+
3475
+ def make_timestamp_ntz(
3476
+ years: "ColumnOrName",
3477
+ months: "ColumnOrName",
3478
+ days: "ColumnOrName",
3479
+ hours: "ColumnOrName",
3480
+ mins: "ColumnOrName",
3481
+ secs: "ColumnOrName",
3482
+ ) -> Column:
3483
+ return _invoke_function_over_columns(
3484
+ "make_timestamp_ntz", years, months, days, hours, mins, secs
3485
+ )
3486
+
3487
+
3488
+ make_timestamp_ntz.__doc__ = pysparkfuncs.make_timestamp_ntz.__doc__
3489
+
3490
+
3491
+ def make_ym_interval(
3492
+ years: Optional["ColumnOrName"] = None,
3493
+ months: Optional["ColumnOrName"] = None,
3494
+ ) -> Column:
3495
+ _years = lit(0) if years is None else _to_col(years)
3496
+ _months = lit(0) if months is None else _to_col(months)
3497
+ return _invoke_function_over_columns("make_ym_interval", _years, _months)
3498
+
3499
+
3500
+ make_ym_interval.__doc__ = pysparkfuncs.make_ym_interval.__doc__
3501
+
3502
+ # Misc Functions
3503
+
3504
+
3505
+ def current_catalog() -> Column:
3506
+ return _invoke_function("current_catalog")
3507
+
3508
+
3509
+ current_catalog.__doc__ = pysparkfuncs.current_catalog.__doc__
3510
+
3511
+
3512
+ def current_database() -> Column:
3513
+ return _invoke_function("current_database")
3514
+
3515
+
3516
+ current_database.__doc__ = pysparkfuncs.current_database.__doc__
3517
+
3518
+
3519
+ def current_schema() -> Column:
3520
+ return _invoke_function("current_schema")
3521
+
3522
+
3523
+ current_schema.__doc__ = pysparkfuncs.current_schema.__doc__
3524
+
3525
+
3526
+ def current_user() -> Column:
3527
+ return _invoke_function("current_user")
3528
+
3529
+
3530
+ current_user.__doc__ = pysparkfuncs.current_user.__doc__
3531
+
3532
+
3533
+ def user() -> Column:
3534
+ return _invoke_function("user")
3535
+
3536
+
3537
+ user.__doc__ = pysparkfuncs.user.__doc__
3538
+
3539
+
3540
+ def assert_true(col: "ColumnOrName", errMsg: Optional[Union[Column, str]] = None) -> Column:
3541
+ if errMsg is None:
3542
+ return _invoke_function_over_columns("assert_true", col)
3543
+ if not isinstance(errMsg, (str, Column)):
3544
+ raise PySparkTypeError(
3545
+ error_class="NOT_COLUMN_OR_STR",
3546
+ message_parameters={"arg_name": "errMsg", "arg_type": type(errMsg).__name__},
3547
+ )
3548
+ _err_msg = lit(errMsg) if isinstance(errMsg, str) else _to_col(errMsg)
3549
+ return _invoke_function("assert_true", _to_col(col), _err_msg)
3550
+
3551
+
3552
+ assert_true.__doc__ = pysparkfuncs.assert_true.__doc__
3553
+
3554
+
3555
+ def raise_error(errMsg: Union[Column, str]) -> Column:
3556
+ if not isinstance(errMsg, (str, Column)):
3557
+ raise PySparkTypeError(
3558
+ error_class="NOT_COLUMN_OR_STR",
3559
+ message_parameters={"arg_name": "errMsg", "arg_type": type(errMsg).__name__},
3560
+ )
3561
+ _err_msg = lit(errMsg) if isinstance(errMsg, str) else _to_col(errMsg)
3562
+ return _invoke_function("raise_error", _err_msg)
3563
+
3564
+
3565
+ raise_error.__doc__ = pysparkfuncs.raise_error.__doc__
3566
+
3567
+
3568
+ def crc32(col: "ColumnOrName") -> Column:
3569
+ return _invoke_function_over_columns("crc32", col)
3570
+
3571
+
3572
+ crc32.__doc__ = pysparkfuncs.crc32.__doc__
3573
+
3574
+
3575
+ def hash(*cols: "ColumnOrName") -> Column:
3576
+ return _invoke_function_over_columns("hash", *cols)
3577
+
3578
+
3579
+ hash.__doc__ = pysparkfuncs.hash.__doc__
3580
+
3581
+
3582
+ def xxhash64(*cols: "ColumnOrName") -> Column:
3583
+ return _invoke_function_over_columns("xxhash64", *cols)
3584
+
3585
+
3586
+ xxhash64.__doc__ = pysparkfuncs.xxhash64.__doc__
3587
+
3588
+
3589
+ def md5(col: "ColumnOrName") -> Column:
3590
+ return _invoke_function_over_columns("md5", col)
3591
+
3592
+
3593
+ md5.__doc__ = pysparkfuncs.md5.__doc__
3594
+
3595
+
3596
+ def sha1(col: "ColumnOrName") -> Column:
3597
+ return _invoke_function_over_columns("sha1", col)
3598
+
3599
+
3600
+ sha1.__doc__ = pysparkfuncs.sha1.__doc__
3601
+
3602
+
3603
+ def sha2(col: "ColumnOrName", numBits: int) -> Column:
3604
+ return _invoke_function("sha2", _to_col(col), lit(numBits))
3605
+
3606
+
3607
+ sha2.__doc__ = pysparkfuncs.sha2.__doc__
3608
+
3609
+
3610
+ def hll_sketch_agg(col: "ColumnOrName", lgConfigK: Optional[Union[int, Column]] = None) -> Column:
3611
+ if lgConfigK is None:
3612
+ return _invoke_function_over_columns("hll_sketch_agg", col)
3613
+ else:
3614
+ _lgConfigK = lit(lgConfigK) if isinstance(lgConfigK, int) else lgConfigK
3615
+ return _invoke_function_over_columns("hll_sketch_agg", col, _lgConfigK)
3616
+
3617
+
3618
+ hll_sketch_agg.__doc__ = pysparkfuncs.hll_sketch_agg.__doc__
3619
+
3620
+
3621
+ def hll_union_agg(col: "ColumnOrName", allowDifferentLgConfigK: Optional[bool] = None) -> Column:
3622
+ if allowDifferentLgConfigK is None:
3623
+ return _invoke_function_over_columns("hll_union_agg", col)
3624
+ else:
3625
+ _allowDifferentLgConfigK = (
3626
+ lit(allowDifferentLgConfigK)
3627
+ if isinstance(allowDifferentLgConfigK, bool)
3628
+ else allowDifferentLgConfigK
3629
+ )
3630
+ return _invoke_function_over_columns("hll_union_agg", col, _allowDifferentLgConfigK)
3631
+
3632
+
3633
+ hll_union_agg.__doc__ = pysparkfuncs.hll_union_agg.__doc__
3634
+
3635
+
3636
+ def hll_sketch_estimate(col: "ColumnOrName") -> Column:
3637
+ return _invoke_function("hll_sketch_estimate", _to_col(col))
3638
+
3639
+
3640
+ hll_sketch_estimate.__doc__ = pysparkfuncs.hll_sketch_estimate.__doc__
3641
+
3642
+
3643
+ def hll_union(
3644
+ col1: "ColumnOrName", col2: "ColumnOrName", allowDifferentLgConfigK: Optional[bool] = None
3645
+ ) -> Column:
3646
+ if allowDifferentLgConfigK is not None:
3647
+ return _invoke_function(
3648
+ "hll_union", _to_col(col1), _to_col(col2), lit(allowDifferentLgConfigK)
3649
+ )
3650
+ else:
3651
+ return _invoke_function("hll_union", _to_col(col1), _to_col(col2))
3652
+
3653
+
3654
+ hll_union.__doc__ = pysparkfuncs.hll_union.__doc__
3655
+
3656
+
3657
+ # Predicates Function
3658
+
3659
+
3660
+ def ifnull(col1: "ColumnOrName", col2: "ColumnOrName") -> Column:
3661
+ return _invoke_function_over_columns("ifnull", col1, col2)
3662
+
3663
+
3664
+ ifnull.__doc__ = pysparkfuncs.ifnull.__doc__
3665
+
3666
+
3667
+ def isnotnull(col: "ColumnOrName") -> Column:
3668
+ return _invoke_function_over_columns("isnotnull", col)
3669
+
3670
+
3671
+ isnotnull.__doc__ = pysparkfuncs.isnotnull.__doc__
3672
+
3673
+
3674
+ def equal_null(col1: "ColumnOrName", col2: "ColumnOrName") -> Column:
3675
+ return _invoke_function_over_columns("equal_null", col1, col2)
3676
+
3677
+
3678
+ equal_null.__doc__ = pysparkfuncs.equal_null.__doc__
3679
+
3680
+
3681
+ def nullif(col1: "ColumnOrName", col2: "ColumnOrName") -> Column:
3682
+ return _invoke_function_over_columns("nullif", col1, col2)
3683
+
3684
+
3685
+ nullif.__doc__ = pysparkfuncs.nullif.__doc__
3686
+
3687
+
3688
+ def nvl(col1: "ColumnOrName", col2: "ColumnOrName") -> Column:
3689
+ return _invoke_function_over_columns("nvl", col1, col2)
3690
+
3691
+
3692
+ nvl.__doc__ = pysparkfuncs.nvl.__doc__
3693
+
3694
+
3695
+ def nvl2(col1: "ColumnOrName", col2: "ColumnOrName", col3: "ColumnOrName") -> Column:
3696
+ return _invoke_function_over_columns("nvl2", col1, col2, col3)
3697
+
3698
+
3699
+ nvl2.__doc__ = pysparkfuncs.nvl2.__doc__
3700
+
3701
+
3702
+ def aes_encrypt(
3703
+ input: "ColumnOrName",
3704
+ key: "ColumnOrName",
3705
+ mode: Optional["ColumnOrName"] = None,
3706
+ padding: Optional["ColumnOrName"] = None,
3707
+ iv: Optional["ColumnOrName"] = None,
3708
+ aad: Optional["ColumnOrName"] = None,
3709
+ ) -> Column:
3710
+ _mode = lit("GCM") if mode is None else _to_col(mode)
3711
+ _padding = lit("DEFAULT") if padding is None else _to_col(padding)
3712
+ _iv = lit("") if iv is None else _to_col(iv)
3713
+ _aad = lit("") if aad is None else _to_col(aad)
3714
+
3715
+ return _invoke_function_over_columns("aes_encrypt", input, key, _mode, _padding, _iv, _aad)
3716
+
3717
+
3718
+ aes_encrypt.__doc__ = pysparkfuncs.aes_encrypt.__doc__
3719
+
3720
+
3721
+ def aes_decrypt(
3722
+ input: "ColumnOrName",
3723
+ key: "ColumnOrName",
3724
+ mode: Optional["ColumnOrName"] = None,
3725
+ padding: Optional["ColumnOrName"] = None,
3726
+ aad: Optional["ColumnOrName"] = None,
3727
+ ) -> Column:
3728
+ _mode = lit("GCM") if mode is None else _to_col(mode)
3729
+ _padding = lit("DEFAULT") if padding is None else _to_col(padding)
3730
+ _aad = lit("") if aad is None else _to_col(aad)
3731
+
3732
+ return _invoke_function_over_columns("aes_decrypt", input, key, _mode, _padding, _aad)
3733
+
3734
+
3735
+ aes_decrypt.__doc__ = pysparkfuncs.aes_decrypt.__doc__
3736
+
3737
+
3738
+ def try_aes_decrypt(
3739
+ input: "ColumnOrName",
3740
+ key: "ColumnOrName",
3741
+ mode: Optional["ColumnOrName"] = None,
3742
+ padding: Optional["ColumnOrName"] = None,
3743
+ aad: Optional["ColumnOrName"] = None,
3744
+ ) -> Column:
3745
+ _mode = lit("GCM") if mode is None else _to_col(mode)
3746
+ _padding = lit("DEFAULT") if padding is None else _to_col(padding)
3747
+ _aad = lit("") if aad is None else _to_col(aad)
3748
+
3749
+ return _invoke_function_over_columns("try_aes_decrypt", input, key, _mode, _padding, _aad)
3750
+
3751
+
3752
+ try_aes_decrypt.__doc__ = pysparkfuncs.try_aes_decrypt.__doc__
3753
+
3754
+
3755
+ def sha(col: "ColumnOrName") -> Column:
3756
+ return _invoke_function_over_columns("sha", col)
3757
+
3758
+
3759
+ sha.__doc__ = pysparkfuncs.sha.__doc__
3760
+
3761
+
3762
+ def input_file_block_length() -> Column:
3763
+ return _invoke_function_over_columns("input_file_block_length")
3764
+
3765
+
3766
+ input_file_block_length.__doc__ = pysparkfuncs.input_file_block_length.__doc__
3767
+
3768
+
3769
+ def input_file_block_start() -> Column:
3770
+ return _invoke_function_over_columns("input_file_block_start")
3771
+
3772
+
3773
+ input_file_block_start.__doc__ = pysparkfuncs.input_file_block_start.__doc__
3774
+
3775
+
3776
+ def reflect(*cols: "ColumnOrName") -> Column:
3777
+ return _invoke_function_over_columns("reflect", *cols)
3778
+
3779
+
3780
+ reflect.__doc__ = pysparkfuncs.reflect.__doc__
3781
+
3782
+
3783
+ def java_method(*cols: "ColumnOrName") -> Column:
3784
+ return _invoke_function_over_columns("java_method", *cols)
3785
+
3786
+
3787
+ java_method.__doc__ = pysparkfuncs.java_method.__doc__
3788
+
3789
+
3790
+ def version() -> Column:
3791
+ return _invoke_function_over_columns("version")
3792
+
3793
+
3794
+ version.__doc__ = pysparkfuncs.version.__doc__
3795
+
3796
+
3797
+ def typeof(col: "ColumnOrName") -> Column:
3798
+ return _invoke_function_over_columns("typeof", col)
3799
+
3800
+
3801
+ typeof.__doc__ = pysparkfuncs.typeof.__doc__
3802
+
3803
+
3804
+ def stack(*cols: "ColumnOrName") -> Column:
3805
+ return _invoke_function_over_columns("stack", *cols)
3806
+
3807
+
3808
+ stack.__doc__ = pysparkfuncs.stack.__doc__
3809
+
3810
+
3811
+ def bitmap_bit_position(col: "ColumnOrName") -> Column:
3812
+ return _invoke_function_over_columns("bitmap_bit_position", col)
3813
+
3814
+
3815
+ bitmap_bit_position.__doc__ = pysparkfuncs.bitmap_bit_position.__doc__
3816
+
3817
+
3818
+ def bitmap_bucket_number(col: "ColumnOrName") -> Column:
3819
+ return _invoke_function_over_columns("bitmap_bucket_number", col)
3820
+
3821
+
3822
+ bitmap_bucket_number.__doc__ = pysparkfuncs.bitmap_bucket_number.__doc__
3823
+
3824
+
3825
+ def bitmap_construct_agg(col: "ColumnOrName") -> Column:
3826
+ return _invoke_function_over_columns("bitmap_construct_agg", col)
3827
+
3828
+
3829
+ bitmap_construct_agg.__doc__ = pysparkfuncs.bitmap_construct_agg.__doc__
3830
+
3831
+
3832
+ def bitmap_count(col: "ColumnOrName") -> Column:
3833
+ return _invoke_function_over_columns("bitmap_count", col)
3834
+
3835
+
3836
+ bitmap_count.__doc__ = pysparkfuncs.bitmap_count.__doc__
3837
+
3838
+
3839
+ def bitmap_or_agg(col: "ColumnOrName") -> Column:
3840
+ return _invoke_function_over_columns("bitmap_or_agg", col)
3841
+
3842
+
3843
+ bitmap_or_agg.__doc__ = pysparkfuncs.bitmap_or_agg.__doc__
3844
+
3845
+
3846
+ # Call Functions
3847
+
3848
+
3849
+ def call_udf(udfName: str, *cols: "ColumnOrName") -> Column:
3850
+ return _invoke_function(udfName, *[_to_col(c) for c in cols])
3851
+
3852
+
3853
+ call_udf.__doc__ = pysparkfuncs.call_udf.__doc__
3854
+
3855
+
3856
+ def unwrap_udt(col: "ColumnOrName") -> Column:
3857
+ return _invoke_function("unwrap_udt", _to_col(col))
3858
+
3859
+
3860
+ unwrap_udt.__doc__ = pysparkfuncs.unwrap_udt.__doc__
3861
+
3862
+
3863
+ def udf(
3864
+ f: Optional[Union[Callable[..., Any], "DataTypeOrString"]] = None,
3865
+ returnType: "DataTypeOrString" = StringType(),
3866
+ useArrow: Optional[bool] = None,
3867
+ ) -> Union["UserDefinedFunctionLike", Callable[[Callable[..., Any]], "UserDefinedFunctionLike"]]:
3868
+ if f is None or isinstance(f, (str, DataType)):
3869
+ # If DataType has been passed as a positional argument
3870
+ # for decorator use it as a returnType
3871
+ return_type = f or returnType
3872
+ return functools.partial(
3873
+ _create_py_udf,
3874
+ returnType=return_type,
3875
+ useArrow=useArrow,
3876
+ )
3877
+ else:
3878
+ return _create_py_udf(f=f, returnType=returnType, useArrow=useArrow)
3879
+
3880
+
3881
+ udf.__doc__ = pysparkfuncs.udf.__doc__
3882
+
3883
+
3884
+ def udtf(
3885
+ cls: Optional[Type] = None,
3886
+ *,
3887
+ returnType: Union[StructType, str],
3888
+ useArrow: Optional[bool] = None,
3889
+ ) -> Union["UserDefinedTableFunction", Callable[[Type], "UserDefinedTableFunction"]]:
3890
+ if cls is None:
3891
+ return functools.partial(_create_py_udtf, returnType=returnType, useArrow=useArrow)
3892
+ else:
3893
+ return _create_py_udtf(cls=cls, returnType=returnType, useArrow=useArrow)
3894
+
3895
+
3896
+ udtf.__doc__ = pysparkfuncs.udtf.__doc__
3897
+
3898
+
3899
+ def call_function(funcName: str, *cols: "ColumnOrName") -> Column:
3900
+ expressions = [_to_col(c)._expr for c in cols]
3901
+ return Column(CallFunction(funcName, expressions))
3902
+
3903
+
3904
+ call_function.__doc__ = pysparkfuncs.call_function.__doc__
3905
+
3906
+
3907
+ def _test() -> None:
3908
+ import sys
3909
+ import os
3910
+ import doctest
3911
+ from pyspark.sql import SparkSession as PySparkSession
3912
+ import pyspark.sql.connect.functions
3913
+
3914
+ globs = pyspark.sql.connect.functions.__dict__.copy()
3915
+
3916
+ globs["spark"] = (
3917
+ PySparkSession.builder.appName("sql.connect.functions tests")
3918
+ .remote(os.environ.get("SPARK_CONNECT_TESTING_REMOTE", "local[4]"))
3919
+ .getOrCreate()
3920
+ )
3921
+
3922
+ (failure_count, test_count) = doctest.testmod(
3923
+ pyspark.sql.connect.functions,
3924
+ globs=globs,
3925
+ optionflags=doctest.ELLIPSIS
3926
+ | doctest.NORMALIZE_WHITESPACE
3927
+ | doctest.IGNORE_EXCEPTION_DETAIL,
3928
+ )
3929
+
3930
+ globs["spark"].stop()
3931
+
3932
+ if failure_count:
3933
+ sys.exit(-1)
3934
+
3935
+
3936
+ if __name__ == "__main__":
3937
+ _test()