snowpark-connect 0.20.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of snowpark-connect might be problematic. Click here for more details.

Files changed (879) hide show
  1. snowflake/snowpark_connect/__init__.py +23 -0
  2. snowflake/snowpark_connect/analyze_plan/__init__.py +3 -0
  3. snowflake/snowpark_connect/analyze_plan/map_tree_string.py +38 -0
  4. snowflake/snowpark_connect/column_name_handler.py +735 -0
  5. snowflake/snowpark_connect/config.py +576 -0
  6. snowflake/snowpark_connect/constants.py +47 -0
  7. snowflake/snowpark_connect/control_server.py +52 -0
  8. snowflake/snowpark_connect/dataframe_name_handler.py +54 -0
  9. snowflake/snowpark_connect/date_time_format_mapping.py +399 -0
  10. snowflake/snowpark_connect/empty_dataframe.py +18 -0
  11. snowflake/snowpark_connect/error/__init__.py +11 -0
  12. snowflake/snowpark_connect/error/error_mapping.py +6174 -0
  13. snowflake/snowpark_connect/error/error_utils.py +321 -0
  14. snowflake/snowpark_connect/error/exceptions.py +24 -0
  15. snowflake/snowpark_connect/execute_plan/__init__.py +3 -0
  16. snowflake/snowpark_connect/execute_plan/map_execution_command.py +204 -0
  17. snowflake/snowpark_connect/execute_plan/map_execution_root.py +173 -0
  18. snowflake/snowpark_connect/execute_plan/utils.py +183 -0
  19. snowflake/snowpark_connect/expression/__init__.py +3 -0
  20. snowflake/snowpark_connect/expression/literal.py +90 -0
  21. snowflake/snowpark_connect/expression/map_cast.py +343 -0
  22. snowflake/snowpark_connect/expression/map_expression.py +293 -0
  23. snowflake/snowpark_connect/expression/map_extension.py +104 -0
  24. snowflake/snowpark_connect/expression/map_sql_expression.py +633 -0
  25. snowflake/snowpark_connect/expression/map_udf.py +142 -0
  26. snowflake/snowpark_connect/expression/map_unresolved_attribute.py +241 -0
  27. snowflake/snowpark_connect/expression/map_unresolved_extract_value.py +85 -0
  28. snowflake/snowpark_connect/expression/map_unresolved_function.py +9450 -0
  29. snowflake/snowpark_connect/expression/map_unresolved_star.py +218 -0
  30. snowflake/snowpark_connect/expression/map_update_fields.py +164 -0
  31. snowflake/snowpark_connect/expression/map_window_function.py +258 -0
  32. snowflake/snowpark_connect/expression/typer.py +125 -0
  33. snowflake/snowpark_connect/includes/__init__.py +0 -0
  34. snowflake/snowpark_connect/includes/jars/antlr4-runtime-4.9.3.jar +0 -0
  35. snowflake/snowpark_connect/includes/jars/commons-cli-1.5.0.jar +0 -0
  36. snowflake/snowpark_connect/includes/jars/commons-codec-1.16.1.jar +0 -0
  37. snowflake/snowpark_connect/includes/jars/commons-collections-3.2.2.jar +0 -0
  38. snowflake/snowpark_connect/includes/jars/commons-collections4-4.4.jar +0 -0
  39. snowflake/snowpark_connect/includes/jars/commons-compiler-3.1.9.jar +0 -0
  40. snowflake/snowpark_connect/includes/jars/commons-compress-1.26.0.jar +0 -0
  41. snowflake/snowpark_connect/includes/jars/commons-crypto-1.1.0.jar +0 -0
  42. snowflake/snowpark_connect/includes/jars/commons-dbcp-1.4.jar +0 -0
  43. snowflake/snowpark_connect/includes/jars/commons-io-2.16.1.jar +0 -0
  44. snowflake/snowpark_connect/includes/jars/commons-lang-2.6.jar +0 -0
  45. snowflake/snowpark_connect/includes/jars/commons-lang3-3.12.0.jar +0 -0
  46. snowflake/snowpark_connect/includes/jars/commons-logging-1.1.3.jar +0 -0
  47. snowflake/snowpark_connect/includes/jars/commons-math3-3.6.1.jar +0 -0
  48. snowflake/snowpark_connect/includes/jars/commons-pool-1.5.4.jar +0 -0
  49. snowflake/snowpark_connect/includes/jars/commons-text-1.10.0.jar +0 -0
  50. snowflake/snowpark_connect/includes/jars/hadoop-client-api-3.3.4.jar +0 -0
  51. snowflake/snowpark_connect/includes/jars/jackson-annotations-2.15.2.jar +0 -0
  52. snowflake/snowpark_connect/includes/jars/jackson-core-2.15.2.jar +0 -0
  53. snowflake/snowpark_connect/includes/jars/jackson-core-asl-1.9.13.jar +0 -0
  54. snowflake/snowpark_connect/includes/jars/jackson-databind-2.15.2.jar +0 -0
  55. snowflake/snowpark_connect/includes/jars/jackson-dataformat-yaml-2.15.2.jar +0 -0
  56. snowflake/snowpark_connect/includes/jars/jackson-datatype-jsr310-2.15.2.jar +0 -0
  57. snowflake/snowpark_connect/includes/jars/jackson-mapper-asl-1.9.13.jar +0 -0
  58. snowflake/snowpark_connect/includes/jars/jackson-module-scala_2.12-2.15.2.jar +0 -0
  59. snowflake/snowpark_connect/includes/jars/json4s-ast_2.12-3.7.0-M11.jar +0 -0
  60. snowflake/snowpark_connect/includes/jars/json4s-core_2.12-3.7.0-M11.jar +0 -0
  61. snowflake/snowpark_connect/includes/jars/json4s-jackson_2.12-3.7.0-M11.jar +0 -0
  62. snowflake/snowpark_connect/includes/jars/json4s-scalap_2.12-3.7.0-M11.jar +0 -0
  63. snowflake/snowpark_connect/includes/jars/kryo-shaded-4.0.2.jar +0 -0
  64. snowflake/snowpark_connect/includes/jars/log4j-1.2-api-2.20.0.jar +0 -0
  65. snowflake/snowpark_connect/includes/jars/log4j-api-2.20.0.jar +0 -0
  66. snowflake/snowpark_connect/includes/jars/log4j-core-2.20.0.jar +0 -0
  67. snowflake/snowpark_connect/includes/jars/log4j-slf4j2-impl-2.20.0.jar +0 -0
  68. snowflake/snowpark_connect/includes/jars/paranamer-2.8.jar +0 -0
  69. snowflake/snowpark_connect/includes/jars/scala-collection-compat_2.12-2.7.0.jar +0 -0
  70. snowflake/snowpark_connect/includes/jars/scala-compiler-2.12.18.jar +0 -0
  71. snowflake/snowpark_connect/includes/jars/scala-library-2.12.18.jar +0 -0
  72. snowflake/snowpark_connect/includes/jars/scala-parser-combinators_2.12-2.3.0.jar +0 -0
  73. snowflake/snowpark_connect/includes/jars/scala-reflect-2.12.18.jar +0 -0
  74. snowflake/snowpark_connect/includes/jars/scala-xml_2.12-2.1.0.jar +0 -0
  75. snowflake/snowpark_connect/includes/jars/slf4j-api-2.0.7.jar +0 -0
  76. snowflake/snowpark_connect/includes/jars/spark-catalyst_2.12-3.5.6.jar +0 -0
  77. snowflake/snowpark_connect/includes/jars/spark-common-utils_2.12-3.5.6.jar +0 -0
  78. snowflake/snowpark_connect/includes/jars/spark-core_2.12-3.5.6.jar +0 -0
  79. snowflake/snowpark_connect/includes/jars/spark-graphx_2.12-3.5.6.jar +0 -0
  80. snowflake/snowpark_connect/includes/jars/spark-hive-thriftserver_2.12-3.5.6.jar +0 -0
  81. snowflake/snowpark_connect/includes/jars/spark-hive_2.12-3.5.6.jar +0 -0
  82. snowflake/snowpark_connect/includes/jars/spark-kubernetes_2.12-3.5.6.jar +0 -0
  83. snowflake/snowpark_connect/includes/jars/spark-kvstore_2.12-3.5.6.jar +0 -0
  84. snowflake/snowpark_connect/includes/jars/spark-launcher_2.12-3.5.6.jar +0 -0
  85. snowflake/snowpark_connect/includes/jars/spark-mesos_2.12-3.5.6.jar +0 -0
  86. snowflake/snowpark_connect/includes/jars/spark-mllib-local_2.12-3.5.6.jar +0 -0
  87. snowflake/snowpark_connect/includes/jars/spark-mllib_2.12-3.5.6.jar +0 -0
  88. snowflake/snowpark_connect/includes/jars/spark-network-common_2.12-3.5.6.jar +0 -0
  89. snowflake/snowpark_connect/includes/jars/spark-network-shuffle_2.12-3.5.6.jar +0 -0
  90. snowflake/snowpark_connect/includes/jars/spark-repl_2.12-3.5.6.jar +0 -0
  91. snowflake/snowpark_connect/includes/jars/spark-sketch_2.12-3.5.6.jar +0 -0
  92. snowflake/snowpark_connect/includes/jars/spark-sql-api_2.12-3.5.6.jar +0 -0
  93. snowflake/snowpark_connect/includes/jars/spark-sql_2.12-3.5.6.jar +0 -0
  94. snowflake/snowpark_connect/includes/jars/spark-streaming_2.12-3.5.6.jar +0 -0
  95. snowflake/snowpark_connect/includes/jars/spark-tags_2.12-3.5.6.jar +0 -0
  96. snowflake/snowpark_connect/includes/jars/spark-unsafe_2.12-3.5.6.jar +0 -0
  97. snowflake/snowpark_connect/includes/jars/spark-yarn_2.12-3.5.6.jar +0 -0
  98. snowflake/snowpark_connect/includes/python/__init__.py +21 -0
  99. snowflake/snowpark_connect/includes/python/pyspark/__init__.py +173 -0
  100. snowflake/snowpark_connect/includes/python/pyspark/_globals.py +71 -0
  101. snowflake/snowpark_connect/includes/python/pyspark/_typing.pyi +43 -0
  102. snowflake/snowpark_connect/includes/python/pyspark/accumulators.py +341 -0
  103. snowflake/snowpark_connect/includes/python/pyspark/broadcast.py +383 -0
  104. snowflake/snowpark_connect/includes/python/pyspark/cloudpickle/__init__.py +8 -0
  105. snowflake/snowpark_connect/includes/python/pyspark/cloudpickle/cloudpickle.py +948 -0
  106. snowflake/snowpark_connect/includes/python/pyspark/cloudpickle/cloudpickle_fast.py +844 -0
  107. snowflake/snowpark_connect/includes/python/pyspark/cloudpickle/compat.py +18 -0
  108. snowflake/snowpark_connect/includes/python/pyspark/conf.py +276 -0
  109. snowflake/snowpark_connect/includes/python/pyspark/context.py +2601 -0
  110. snowflake/snowpark_connect/includes/python/pyspark/daemon.py +218 -0
  111. snowflake/snowpark_connect/includes/python/pyspark/errors/__init__.py +70 -0
  112. snowflake/snowpark_connect/includes/python/pyspark/errors/error_classes.py +889 -0
  113. snowflake/snowpark_connect/includes/python/pyspark/errors/exceptions/__init__.py +16 -0
  114. snowflake/snowpark_connect/includes/python/pyspark/errors/exceptions/base.py +228 -0
  115. snowflake/snowpark_connect/includes/python/pyspark/errors/exceptions/captured.py +307 -0
  116. snowflake/snowpark_connect/includes/python/pyspark/errors/exceptions/connect.py +190 -0
  117. snowflake/snowpark_connect/includes/python/pyspark/errors/tests/__init__.py +16 -0
  118. snowflake/snowpark_connect/includes/python/pyspark/errors/tests/test_errors.py +60 -0
  119. snowflake/snowpark_connect/includes/python/pyspark/errors/utils.py +116 -0
  120. snowflake/snowpark_connect/includes/python/pyspark/files.py +165 -0
  121. snowflake/snowpark_connect/includes/python/pyspark/find_spark_home.py +95 -0
  122. snowflake/snowpark_connect/includes/python/pyspark/install.py +203 -0
  123. snowflake/snowpark_connect/includes/python/pyspark/instrumentation_utils.py +190 -0
  124. snowflake/snowpark_connect/includes/python/pyspark/java_gateway.py +248 -0
  125. snowflake/snowpark_connect/includes/python/pyspark/join.py +118 -0
  126. snowflake/snowpark_connect/includes/python/pyspark/ml/__init__.py +71 -0
  127. snowflake/snowpark_connect/includes/python/pyspark/ml/_typing.pyi +84 -0
  128. snowflake/snowpark_connect/includes/python/pyspark/ml/base.py +414 -0
  129. snowflake/snowpark_connect/includes/python/pyspark/ml/classification.py +4332 -0
  130. snowflake/snowpark_connect/includes/python/pyspark/ml/clustering.py +2188 -0
  131. snowflake/snowpark_connect/includes/python/pyspark/ml/common.py +146 -0
  132. snowflake/snowpark_connect/includes/python/pyspark/ml/connect/__init__.py +44 -0
  133. snowflake/snowpark_connect/includes/python/pyspark/ml/connect/base.py +346 -0
  134. snowflake/snowpark_connect/includes/python/pyspark/ml/connect/classification.py +382 -0
  135. snowflake/snowpark_connect/includes/python/pyspark/ml/connect/evaluation.py +291 -0
  136. snowflake/snowpark_connect/includes/python/pyspark/ml/connect/feature.py +258 -0
  137. snowflake/snowpark_connect/includes/python/pyspark/ml/connect/functions.py +77 -0
  138. snowflake/snowpark_connect/includes/python/pyspark/ml/connect/io_utils.py +335 -0
  139. snowflake/snowpark_connect/includes/python/pyspark/ml/connect/pipeline.py +262 -0
  140. snowflake/snowpark_connect/includes/python/pyspark/ml/connect/summarizer.py +120 -0
  141. snowflake/snowpark_connect/includes/python/pyspark/ml/connect/tuning.py +579 -0
  142. snowflake/snowpark_connect/includes/python/pyspark/ml/connect/util.py +173 -0
  143. snowflake/snowpark_connect/includes/python/pyspark/ml/deepspeed/__init__.py +16 -0
  144. snowflake/snowpark_connect/includes/python/pyspark/ml/deepspeed/deepspeed_distributor.py +165 -0
  145. snowflake/snowpark_connect/includes/python/pyspark/ml/deepspeed/tests/test_deepspeed_distributor.py +306 -0
  146. snowflake/snowpark_connect/includes/python/pyspark/ml/dl_util.py +150 -0
  147. snowflake/snowpark_connect/includes/python/pyspark/ml/evaluation.py +1166 -0
  148. snowflake/snowpark_connect/includes/python/pyspark/ml/feature.py +7474 -0
  149. snowflake/snowpark_connect/includes/python/pyspark/ml/fpm.py +543 -0
  150. snowflake/snowpark_connect/includes/python/pyspark/ml/functions.py +842 -0
  151. snowflake/snowpark_connect/includes/python/pyspark/ml/image.py +271 -0
  152. snowflake/snowpark_connect/includes/python/pyspark/ml/linalg/__init__.py +1382 -0
  153. snowflake/snowpark_connect/includes/python/pyspark/ml/model_cache.py +55 -0
  154. snowflake/snowpark_connect/includes/python/pyspark/ml/param/__init__.py +602 -0
  155. snowflake/snowpark_connect/includes/python/pyspark/ml/param/_shared_params_code_gen.py +368 -0
  156. snowflake/snowpark_connect/includes/python/pyspark/ml/param/shared.py +878 -0
  157. snowflake/snowpark_connect/includes/python/pyspark/ml/pipeline.py +451 -0
  158. snowflake/snowpark_connect/includes/python/pyspark/ml/recommendation.py +748 -0
  159. snowflake/snowpark_connect/includes/python/pyspark/ml/regression.py +3335 -0
  160. snowflake/snowpark_connect/includes/python/pyspark/ml/stat.py +523 -0
  161. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/__init__.py +16 -0
  162. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_connect_classification.py +53 -0
  163. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_connect_evaluation.py +50 -0
  164. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_connect_feature.py +43 -0
  165. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_connect_function.py +114 -0
  166. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_connect_pipeline.py +47 -0
  167. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_connect_summarizer.py +43 -0
  168. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_connect_tuning.py +46 -0
  169. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_legacy_mode_classification.py +238 -0
  170. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_legacy_mode_evaluation.py +194 -0
  171. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_legacy_mode_feature.py +156 -0
  172. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_legacy_mode_pipeline.py +184 -0
  173. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_legacy_mode_summarizer.py +78 -0
  174. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_legacy_mode_tuning.py +292 -0
  175. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_parity_torch_data_loader.py +50 -0
  176. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_parity_torch_distributor.py +152 -0
  177. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_algorithms.py +456 -0
  178. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_base.py +96 -0
  179. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_dl_util.py +186 -0
  180. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_evaluation.py +77 -0
  181. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_feature.py +401 -0
  182. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_functions.py +528 -0
  183. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_image.py +82 -0
  184. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_linalg.py +409 -0
  185. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_model_cache.py +55 -0
  186. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_param.py +441 -0
  187. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_persistence.py +546 -0
  188. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_pipeline.py +71 -0
  189. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_stat.py +52 -0
  190. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_training_summary.py +494 -0
  191. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_util.py +85 -0
  192. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_wrapper.py +138 -0
  193. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/tuning/__init__.py +16 -0
  194. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/tuning/test_cv_io_basic.py +151 -0
  195. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/tuning/test_cv_io_nested.py +97 -0
  196. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/tuning/test_cv_io_pipeline.py +143 -0
  197. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/tuning/test_tuning.py +551 -0
  198. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/tuning/test_tvs_io_basic.py +137 -0
  199. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/tuning/test_tvs_io_nested.py +96 -0
  200. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/tuning/test_tvs_io_pipeline.py +142 -0
  201. snowflake/snowpark_connect/includes/python/pyspark/ml/torch/__init__.py +16 -0
  202. snowflake/snowpark_connect/includes/python/pyspark/ml/torch/data.py +100 -0
  203. snowflake/snowpark_connect/includes/python/pyspark/ml/torch/distributor.py +1133 -0
  204. snowflake/snowpark_connect/includes/python/pyspark/ml/torch/log_communication.py +198 -0
  205. snowflake/snowpark_connect/includes/python/pyspark/ml/torch/tests/__init__.py +16 -0
  206. snowflake/snowpark_connect/includes/python/pyspark/ml/torch/tests/test_data_loader.py +137 -0
  207. snowflake/snowpark_connect/includes/python/pyspark/ml/torch/tests/test_distributor.py +561 -0
  208. snowflake/snowpark_connect/includes/python/pyspark/ml/torch/tests/test_log_communication.py +172 -0
  209. snowflake/snowpark_connect/includes/python/pyspark/ml/torch/torch_run_process_wrapper.py +83 -0
  210. snowflake/snowpark_connect/includes/python/pyspark/ml/tree.py +434 -0
  211. snowflake/snowpark_connect/includes/python/pyspark/ml/tuning.py +1741 -0
  212. snowflake/snowpark_connect/includes/python/pyspark/ml/util.py +749 -0
  213. snowflake/snowpark_connect/includes/python/pyspark/ml/wrapper.py +465 -0
  214. snowflake/snowpark_connect/includes/python/pyspark/mllib/__init__.py +44 -0
  215. snowflake/snowpark_connect/includes/python/pyspark/mllib/_typing.pyi +33 -0
  216. snowflake/snowpark_connect/includes/python/pyspark/mllib/classification.py +989 -0
  217. snowflake/snowpark_connect/includes/python/pyspark/mllib/clustering.py +1318 -0
  218. snowflake/snowpark_connect/includes/python/pyspark/mllib/common.py +174 -0
  219. snowflake/snowpark_connect/includes/python/pyspark/mllib/evaluation.py +691 -0
  220. snowflake/snowpark_connect/includes/python/pyspark/mllib/feature.py +1085 -0
  221. snowflake/snowpark_connect/includes/python/pyspark/mllib/fpm.py +233 -0
  222. snowflake/snowpark_connect/includes/python/pyspark/mllib/linalg/__init__.py +1653 -0
  223. snowflake/snowpark_connect/includes/python/pyspark/mllib/linalg/distributed.py +1662 -0
  224. snowflake/snowpark_connect/includes/python/pyspark/mllib/random.py +698 -0
  225. snowflake/snowpark_connect/includes/python/pyspark/mllib/recommendation.py +389 -0
  226. snowflake/snowpark_connect/includes/python/pyspark/mllib/regression.py +1067 -0
  227. snowflake/snowpark_connect/includes/python/pyspark/mllib/stat/KernelDensity.py +59 -0
  228. snowflake/snowpark_connect/includes/python/pyspark/mllib/stat/__init__.py +34 -0
  229. snowflake/snowpark_connect/includes/python/pyspark/mllib/stat/_statistics.py +409 -0
  230. snowflake/snowpark_connect/includes/python/pyspark/mllib/stat/distribution.py +39 -0
  231. snowflake/snowpark_connect/includes/python/pyspark/mllib/stat/test.py +86 -0
  232. snowflake/snowpark_connect/includes/python/pyspark/mllib/tests/__init__.py +16 -0
  233. snowflake/snowpark_connect/includes/python/pyspark/mllib/tests/test_algorithms.py +353 -0
  234. snowflake/snowpark_connect/includes/python/pyspark/mllib/tests/test_feature.py +192 -0
  235. snowflake/snowpark_connect/includes/python/pyspark/mllib/tests/test_linalg.py +680 -0
  236. snowflake/snowpark_connect/includes/python/pyspark/mllib/tests/test_stat.py +206 -0
  237. snowflake/snowpark_connect/includes/python/pyspark/mllib/tests/test_streaming_algorithms.py +471 -0
  238. snowflake/snowpark_connect/includes/python/pyspark/mllib/tests/test_util.py +108 -0
  239. snowflake/snowpark_connect/includes/python/pyspark/mllib/tree.py +888 -0
  240. snowflake/snowpark_connect/includes/python/pyspark/mllib/util.py +659 -0
  241. snowflake/snowpark_connect/includes/python/pyspark/pandas/__init__.py +165 -0
  242. snowflake/snowpark_connect/includes/python/pyspark/pandas/_typing.py +52 -0
  243. snowflake/snowpark_connect/includes/python/pyspark/pandas/accessors.py +989 -0
  244. snowflake/snowpark_connect/includes/python/pyspark/pandas/base.py +1804 -0
  245. snowflake/snowpark_connect/includes/python/pyspark/pandas/categorical.py +822 -0
  246. snowflake/snowpark_connect/includes/python/pyspark/pandas/config.py +539 -0
  247. snowflake/snowpark_connect/includes/python/pyspark/pandas/correlation.py +262 -0
  248. snowflake/snowpark_connect/includes/python/pyspark/pandas/data_type_ops/__init__.py +16 -0
  249. snowflake/snowpark_connect/includes/python/pyspark/pandas/data_type_ops/base.py +519 -0
  250. snowflake/snowpark_connect/includes/python/pyspark/pandas/data_type_ops/binary_ops.py +98 -0
  251. snowflake/snowpark_connect/includes/python/pyspark/pandas/data_type_ops/boolean_ops.py +426 -0
  252. snowflake/snowpark_connect/includes/python/pyspark/pandas/data_type_ops/categorical_ops.py +141 -0
  253. snowflake/snowpark_connect/includes/python/pyspark/pandas/data_type_ops/complex_ops.py +145 -0
  254. snowflake/snowpark_connect/includes/python/pyspark/pandas/data_type_ops/date_ops.py +127 -0
  255. snowflake/snowpark_connect/includes/python/pyspark/pandas/data_type_ops/datetime_ops.py +171 -0
  256. snowflake/snowpark_connect/includes/python/pyspark/pandas/data_type_ops/null_ops.py +83 -0
  257. snowflake/snowpark_connect/includes/python/pyspark/pandas/data_type_ops/num_ops.py +588 -0
  258. snowflake/snowpark_connect/includes/python/pyspark/pandas/data_type_ops/string_ops.py +154 -0
  259. snowflake/snowpark_connect/includes/python/pyspark/pandas/data_type_ops/timedelta_ops.py +101 -0
  260. snowflake/snowpark_connect/includes/python/pyspark/pandas/data_type_ops/udt_ops.py +29 -0
  261. snowflake/snowpark_connect/includes/python/pyspark/pandas/datetimes.py +891 -0
  262. snowflake/snowpark_connect/includes/python/pyspark/pandas/exceptions.py +150 -0
  263. snowflake/snowpark_connect/includes/python/pyspark/pandas/extensions.py +388 -0
  264. snowflake/snowpark_connect/includes/python/pyspark/pandas/frame.py +13738 -0
  265. snowflake/snowpark_connect/includes/python/pyspark/pandas/generic.py +3560 -0
  266. snowflake/snowpark_connect/includes/python/pyspark/pandas/groupby.py +4448 -0
  267. snowflake/snowpark_connect/includes/python/pyspark/pandas/indexes/__init__.py +21 -0
  268. snowflake/snowpark_connect/includes/python/pyspark/pandas/indexes/base.py +2783 -0
  269. snowflake/snowpark_connect/includes/python/pyspark/pandas/indexes/category.py +773 -0
  270. snowflake/snowpark_connect/includes/python/pyspark/pandas/indexes/datetimes.py +843 -0
  271. snowflake/snowpark_connect/includes/python/pyspark/pandas/indexes/multi.py +1323 -0
  272. snowflake/snowpark_connect/includes/python/pyspark/pandas/indexes/numeric.py +210 -0
  273. snowflake/snowpark_connect/includes/python/pyspark/pandas/indexes/timedelta.py +197 -0
  274. snowflake/snowpark_connect/includes/python/pyspark/pandas/indexing.py +1862 -0
  275. snowflake/snowpark_connect/includes/python/pyspark/pandas/internal.py +1680 -0
  276. snowflake/snowpark_connect/includes/python/pyspark/pandas/missing/__init__.py +48 -0
  277. snowflake/snowpark_connect/includes/python/pyspark/pandas/missing/common.py +76 -0
  278. snowflake/snowpark_connect/includes/python/pyspark/pandas/missing/frame.py +63 -0
  279. snowflake/snowpark_connect/includes/python/pyspark/pandas/missing/general_functions.py +43 -0
  280. snowflake/snowpark_connect/includes/python/pyspark/pandas/missing/groupby.py +93 -0
  281. snowflake/snowpark_connect/includes/python/pyspark/pandas/missing/indexes.py +184 -0
  282. snowflake/snowpark_connect/includes/python/pyspark/pandas/missing/resample.py +101 -0
  283. snowflake/snowpark_connect/includes/python/pyspark/pandas/missing/scalars.py +29 -0
  284. snowflake/snowpark_connect/includes/python/pyspark/pandas/missing/series.py +69 -0
  285. snowflake/snowpark_connect/includes/python/pyspark/pandas/missing/window.py +168 -0
  286. snowflake/snowpark_connect/includes/python/pyspark/pandas/mlflow.py +238 -0
  287. snowflake/snowpark_connect/includes/python/pyspark/pandas/namespace.py +3807 -0
  288. snowflake/snowpark_connect/includes/python/pyspark/pandas/numpy_compat.py +260 -0
  289. snowflake/snowpark_connect/includes/python/pyspark/pandas/plot/__init__.py +17 -0
  290. snowflake/snowpark_connect/includes/python/pyspark/pandas/plot/core.py +1213 -0
  291. snowflake/snowpark_connect/includes/python/pyspark/pandas/plot/matplotlib.py +928 -0
  292. snowflake/snowpark_connect/includes/python/pyspark/pandas/plot/plotly.py +261 -0
  293. snowflake/snowpark_connect/includes/python/pyspark/pandas/resample.py +816 -0
  294. snowflake/snowpark_connect/includes/python/pyspark/pandas/series.py +7440 -0
  295. snowflake/snowpark_connect/includes/python/pyspark/pandas/sql_formatter.py +308 -0
  296. snowflake/snowpark_connect/includes/python/pyspark/pandas/sql_processor.py +394 -0
  297. snowflake/snowpark_connect/includes/python/pyspark/pandas/strings.py +2371 -0
  298. snowflake/snowpark_connect/includes/python/pyspark/pandas/supported_api_gen.py +378 -0
  299. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/__init__.py +16 -0
  300. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/__init__.py +16 -0
  301. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_any_all.py +177 -0
  302. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_apply_func.py +575 -0
  303. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_binary_ops.py +235 -0
  304. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_combine.py +653 -0
  305. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_compute.py +463 -0
  306. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_corrwith.py +86 -0
  307. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_cov.py +151 -0
  308. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_cumulative.py +139 -0
  309. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_describe.py +458 -0
  310. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_eval.py +86 -0
  311. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_melt.py +202 -0
  312. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_missing_data.py +520 -0
  313. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_pivot.py +361 -0
  314. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/__init__.py +16 -0
  315. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/__init__.py +16 -0
  316. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_any_all.py +40 -0
  317. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_apply_func.py +42 -0
  318. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_binary_ops.py +40 -0
  319. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_combine.py +37 -0
  320. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_compute.py +60 -0
  321. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_corrwith.py +40 -0
  322. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_cov.py +40 -0
  323. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_cumulative.py +90 -0
  324. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_describe.py +40 -0
  325. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_eval.py +40 -0
  326. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_melt.py +40 -0
  327. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_missing_data.py +42 -0
  328. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_pivot.py +37 -0
  329. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/__init__.py +16 -0
  330. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_base.py +36 -0
  331. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_binary_ops.py +42 -0
  332. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_boolean_ops.py +47 -0
  333. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_categorical_ops.py +55 -0
  334. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_complex_ops.py +40 -0
  335. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_date_ops.py +47 -0
  336. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_datetime_ops.py +47 -0
  337. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_null_ops.py +42 -0
  338. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_num_arithmetic.py +43 -0
  339. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_num_ops.py +47 -0
  340. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_num_reverse.py +43 -0
  341. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_string_ops.py +47 -0
  342. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_timedelta_ops.py +47 -0
  343. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_udt_ops.py +40 -0
  344. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/testing_utils.py +226 -0
  345. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/__init__.py +16 -0
  346. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_align.py +39 -0
  347. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_basic_slow.py +55 -0
  348. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_cov_corrwith.py +39 -0
  349. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_dot_frame.py +39 -0
  350. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_dot_series.py +39 -0
  351. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_index.py +39 -0
  352. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_series.py +39 -0
  353. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_setitem_frame.py +43 -0
  354. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_setitem_series.py +43 -0
  355. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/__init__.py +16 -0
  356. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_attrs.py +40 -0
  357. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_constructor.py +39 -0
  358. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_conversion.py +42 -0
  359. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_reindexing.py +42 -0
  360. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_reshaping.py +37 -0
  361. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_spark.py +40 -0
  362. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_take.py +42 -0
  363. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_time_series.py +48 -0
  364. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_truncate.py +40 -0
  365. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/__init__.py +16 -0
  366. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_aggregate.py +40 -0
  367. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_apply_func.py +41 -0
  368. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_cumulative.py +67 -0
  369. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_describe.py +40 -0
  370. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_groupby.py +55 -0
  371. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_head_tail.py +40 -0
  372. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_index.py +38 -0
  373. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_missing_data.py +55 -0
  374. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_split_apply.py +39 -0
  375. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_stat.py +38 -0
  376. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/__init__.py +16 -0
  377. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_align.py +40 -0
  378. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_base.py +50 -0
  379. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_category.py +73 -0
  380. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_datetime.py +39 -0
  381. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_indexing.py +40 -0
  382. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_reindex.py +40 -0
  383. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_rename.py +40 -0
  384. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_reset_index.py +48 -0
  385. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_timedelta.py +39 -0
  386. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/io/__init__.py +16 -0
  387. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/io/test_parity_io.py +40 -0
  388. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/plot/__init__.py +16 -0
  389. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/plot/test_parity_frame_plot.py +45 -0
  390. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/plot/test_parity_frame_plot_matplotlib.py +45 -0
  391. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/plot/test_parity_frame_plot_plotly.py +49 -0
  392. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/plot/test_parity_series_plot.py +37 -0
  393. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/plot/test_parity_series_plot_matplotlib.py +53 -0
  394. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/plot/test_parity_series_plot_plotly.py +45 -0
  395. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/__init__.py +16 -0
  396. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_all_any.py +38 -0
  397. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_arg_ops.py +37 -0
  398. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_as_of.py +37 -0
  399. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_as_type.py +38 -0
  400. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_compute.py +37 -0
  401. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_conversion.py +40 -0
  402. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_cumulative.py +40 -0
  403. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_index.py +38 -0
  404. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_missing_data.py +40 -0
  405. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_series.py +37 -0
  406. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_sort.py +38 -0
  407. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_stat.py +38 -0
  408. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_categorical.py +66 -0
  409. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_config.py +37 -0
  410. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_csv.py +37 -0
  411. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_dataframe_conversion.py +42 -0
  412. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_dataframe_spark_io.py +39 -0
  413. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_default_index.py +49 -0
  414. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_ewm.py +37 -0
  415. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_expanding.py +39 -0
  416. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_extension.py +49 -0
  417. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_frame_spark.py +53 -0
  418. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_generic_functions.py +43 -0
  419. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_indexing.py +49 -0
  420. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_indexops_spark.py +39 -0
  421. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_internal.py +41 -0
  422. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_namespace.py +39 -0
  423. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_numpy_compat.py +60 -0
  424. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_ops_on_diff_frames.py +48 -0
  425. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_ops_on_diff_frames_groupby.py +39 -0
  426. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_ops_on_diff_frames_groupby_expanding.py +44 -0
  427. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_ops_on_diff_frames_groupby_rolling.py +84 -0
  428. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_repr.py +37 -0
  429. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_resample.py +45 -0
  430. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_reshape.py +39 -0
  431. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_rolling.py +39 -0
  432. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_scalars.py +37 -0
  433. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_series_conversion.py +39 -0
  434. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_series_datetime.py +39 -0
  435. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_series_string.py +39 -0
  436. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_spark_functions.py +39 -0
  437. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_sql.py +43 -0
  438. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_stats.py +37 -0
  439. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_typedef.py +36 -0
  440. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_utils.py +37 -0
  441. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_window.py +39 -0
  442. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/__init__.py +16 -0
  443. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_base.py +107 -0
  444. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_binary_ops.py +224 -0
  445. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_boolean_ops.py +825 -0
  446. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_categorical_ops.py +562 -0
  447. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_complex_ops.py +368 -0
  448. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_date_ops.py +257 -0
  449. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_datetime_ops.py +260 -0
  450. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_null_ops.py +178 -0
  451. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_num_arithmetic.py +184 -0
  452. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_num_ops.py +497 -0
  453. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_num_reverse.py +140 -0
  454. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_string_ops.py +354 -0
  455. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_timedelta_ops.py +219 -0
  456. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_udt_ops.py +192 -0
  457. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/testing_utils.py +228 -0
  458. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/__init__.py +16 -0
  459. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_align.py +118 -0
  460. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_basic_slow.py +198 -0
  461. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_cov_corrwith.py +181 -0
  462. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_dot_frame.py +103 -0
  463. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_dot_series.py +141 -0
  464. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_index.py +109 -0
  465. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_series.py +136 -0
  466. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_setitem_frame.py +125 -0
  467. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_setitem_series.py +217 -0
  468. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/__init__.py +16 -0
  469. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_attrs.py +384 -0
  470. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_constructor.py +598 -0
  471. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_conversion.py +73 -0
  472. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_reindexing.py +869 -0
  473. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_reshaping.py +487 -0
  474. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_spark.py +309 -0
  475. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_take.py +156 -0
  476. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_time_series.py +149 -0
  477. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_truncate.py +163 -0
  478. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/__init__.py +16 -0
  479. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_aggregate.py +311 -0
  480. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_apply_func.py +524 -0
  481. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_cumulative.py +419 -0
  482. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_describe.py +144 -0
  483. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_groupby.py +979 -0
  484. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_head_tail.py +234 -0
  485. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_index.py +206 -0
  486. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_missing_data.py +421 -0
  487. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_split_apply.py +187 -0
  488. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_stat.py +397 -0
  489. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/__init__.py +16 -0
  490. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_align.py +100 -0
  491. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_base.py +2743 -0
  492. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_category.py +484 -0
  493. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_datetime.py +276 -0
  494. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_indexing.py +432 -0
  495. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_reindex.py +310 -0
  496. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_rename.py +257 -0
  497. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_reset_index.py +160 -0
  498. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_timedelta.py +128 -0
  499. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/io/__init__.py +16 -0
  500. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/io/test_io.py +137 -0
  501. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/plot/__init__.py +16 -0
  502. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/plot/test_frame_plot.py +170 -0
  503. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/plot/test_frame_plot_matplotlib.py +547 -0
  504. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/plot/test_frame_plot_plotly.py +285 -0
  505. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/plot/test_series_plot.py +106 -0
  506. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/plot/test_series_plot_matplotlib.py +409 -0
  507. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/plot/test_series_plot_plotly.py +247 -0
  508. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/__init__.py +16 -0
  509. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_all_any.py +105 -0
  510. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_arg_ops.py +197 -0
  511. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_as_of.py +137 -0
  512. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_as_type.py +227 -0
  513. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_compute.py +634 -0
  514. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_conversion.py +88 -0
  515. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_cumulative.py +139 -0
  516. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_index.py +475 -0
  517. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_missing_data.py +265 -0
  518. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_series.py +818 -0
  519. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_sort.py +162 -0
  520. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_stat.py +780 -0
  521. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_categorical.py +741 -0
  522. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_config.py +160 -0
  523. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_csv.py +453 -0
  524. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_dataframe_conversion.py +281 -0
  525. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_dataframe_spark_io.py +487 -0
  526. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_default_index.py +109 -0
  527. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_ewm.py +434 -0
  528. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_expanding.py +253 -0
  529. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_extension.py +152 -0
  530. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_frame_spark.py +162 -0
  531. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_generic_functions.py +234 -0
  532. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_indexing.py +1339 -0
  533. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_indexops_spark.py +82 -0
  534. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_internal.py +124 -0
  535. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_namespace.py +638 -0
  536. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_numpy_compat.py +200 -0
  537. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_ops_on_diff_frames.py +1355 -0
  538. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_ops_on_diff_frames_groupby.py +655 -0
  539. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_ops_on_diff_frames_groupby_expanding.py +113 -0
  540. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_ops_on_diff_frames_groupby_rolling.py +118 -0
  541. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_repr.py +192 -0
  542. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_resample.py +346 -0
  543. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_reshape.py +495 -0
  544. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_rolling.py +263 -0
  545. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_scalars.py +59 -0
  546. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_series_conversion.py +85 -0
  547. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_series_datetime.py +364 -0
  548. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_series_string.py +362 -0
  549. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_spark_functions.py +46 -0
  550. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_sql.py +123 -0
  551. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_stats.py +581 -0
  552. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_typedef.py +447 -0
  553. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_utils.py +301 -0
  554. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_window.py +465 -0
  555. snowflake/snowpark_connect/includes/python/pyspark/pandas/typedef/__init__.py +18 -0
  556. snowflake/snowpark_connect/includes/python/pyspark/pandas/typedef/typehints.py +874 -0
  557. snowflake/snowpark_connect/includes/python/pyspark/pandas/usage_logging/__init__.py +143 -0
  558. snowflake/snowpark_connect/includes/python/pyspark/pandas/usage_logging/usage_logger.py +132 -0
  559. snowflake/snowpark_connect/includes/python/pyspark/pandas/utils.py +1063 -0
  560. snowflake/snowpark_connect/includes/python/pyspark/pandas/window.py +2702 -0
  561. snowflake/snowpark_connect/includes/python/pyspark/profiler.py +489 -0
  562. snowflake/snowpark_connect/includes/python/pyspark/py.typed +1 -0
  563. snowflake/snowpark_connect/includes/python/pyspark/python/pyspark/shell.py +123 -0
  564. snowflake/snowpark_connect/includes/python/pyspark/rdd.py +5518 -0
  565. snowflake/snowpark_connect/includes/python/pyspark/rddsampler.py +115 -0
  566. snowflake/snowpark_connect/includes/python/pyspark/resource/__init__.py +38 -0
  567. snowflake/snowpark_connect/includes/python/pyspark/resource/information.py +69 -0
  568. snowflake/snowpark_connect/includes/python/pyspark/resource/profile.py +317 -0
  569. snowflake/snowpark_connect/includes/python/pyspark/resource/requests.py +539 -0
  570. snowflake/snowpark_connect/includes/python/pyspark/resource/tests/__init__.py +16 -0
  571. snowflake/snowpark_connect/includes/python/pyspark/resource/tests/test_resources.py +83 -0
  572. snowflake/snowpark_connect/includes/python/pyspark/resultiterable.py +45 -0
  573. snowflake/snowpark_connect/includes/python/pyspark/serializers.py +681 -0
  574. snowflake/snowpark_connect/includes/python/pyspark/shell.py +123 -0
  575. snowflake/snowpark_connect/includes/python/pyspark/shuffle.py +854 -0
  576. snowflake/snowpark_connect/includes/python/pyspark/sql/__init__.py +75 -0
  577. snowflake/snowpark_connect/includes/python/pyspark/sql/_typing.pyi +80 -0
  578. snowflake/snowpark_connect/includes/python/pyspark/sql/avro/__init__.py +18 -0
  579. snowflake/snowpark_connect/includes/python/pyspark/sql/avro/functions.py +188 -0
  580. snowflake/snowpark_connect/includes/python/pyspark/sql/catalog.py +1270 -0
  581. snowflake/snowpark_connect/includes/python/pyspark/sql/column.py +1431 -0
  582. snowflake/snowpark_connect/includes/python/pyspark/sql/conf.py +99 -0
  583. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/__init__.py +18 -0
  584. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/_typing.py +90 -0
  585. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/avro/__init__.py +18 -0
  586. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/avro/functions.py +107 -0
  587. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/catalog.py +356 -0
  588. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/client/__init__.py +22 -0
  589. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/client/artifact.py +412 -0
  590. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/client/core.py +1689 -0
  591. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/client/reattach.py +340 -0
  592. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/column.py +514 -0
  593. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/conf.py +128 -0
  594. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/conversion.py +490 -0
  595. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/dataframe.py +2172 -0
  596. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/expressions.py +1056 -0
  597. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/functions.py +3937 -0
  598. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/group.py +418 -0
  599. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/plan.py +2289 -0
  600. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/proto/__init__.py +25 -0
  601. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/proto/base_pb2.py +203 -0
  602. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/proto/base_pb2.pyi +2718 -0
  603. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/proto/base_pb2_grpc.py +423 -0
  604. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/proto/catalog_pb2.py +109 -0
  605. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/proto/catalog_pb2.pyi +1130 -0
  606. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/proto/commands_pb2.py +141 -0
  607. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/proto/commands_pb2.pyi +1766 -0
  608. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/proto/common_pb2.py +47 -0
  609. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/proto/common_pb2.pyi +123 -0
  610. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/proto/example_plugins_pb2.py +53 -0
  611. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/proto/example_plugins_pb2.pyi +112 -0
  612. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/proto/expressions_pb2.py +107 -0
  613. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/proto/expressions_pb2.pyi +1507 -0
  614. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/proto/relations_pb2.py +195 -0
  615. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/proto/relations_pb2.pyi +3613 -0
  616. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/proto/types_pb2.py +95 -0
  617. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/proto/types_pb2.pyi +980 -0
  618. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/protobuf/__init__.py +18 -0
  619. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/protobuf/functions.py +166 -0
  620. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/readwriter.py +861 -0
  621. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/session.py +952 -0
  622. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/streaming/__init__.py +22 -0
  623. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/streaming/query.py +295 -0
  624. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/streaming/readwriter.py +618 -0
  625. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/streaming/worker/__init__.py +18 -0
  626. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/streaming/worker/foreach_batch_worker.py +87 -0
  627. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/streaming/worker/listener_worker.py +100 -0
  628. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/types.py +301 -0
  629. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/udf.py +296 -0
  630. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/udtf.py +200 -0
  631. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/utils.py +58 -0
  632. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/window.py +266 -0
  633. snowflake/snowpark_connect/includes/python/pyspark/sql/context.py +818 -0
  634. snowflake/snowpark_connect/includes/python/pyspark/sql/dataframe.py +5973 -0
  635. snowflake/snowpark_connect/includes/python/pyspark/sql/functions.py +15889 -0
  636. snowflake/snowpark_connect/includes/python/pyspark/sql/group.py +547 -0
  637. snowflake/snowpark_connect/includes/python/pyspark/sql/observation.py +152 -0
  638. snowflake/snowpark_connect/includes/python/pyspark/sql/pandas/__init__.py +21 -0
  639. snowflake/snowpark_connect/includes/python/pyspark/sql/pandas/_typing/__init__.pyi +344 -0
  640. snowflake/snowpark_connect/includes/python/pyspark/sql/pandas/_typing/protocols/__init__.pyi +17 -0
  641. snowflake/snowpark_connect/includes/python/pyspark/sql/pandas/_typing/protocols/frame.pyi +20 -0
  642. snowflake/snowpark_connect/includes/python/pyspark/sql/pandas/_typing/protocols/series.pyi +20 -0
  643. snowflake/snowpark_connect/includes/python/pyspark/sql/pandas/conversion.py +671 -0
  644. snowflake/snowpark_connect/includes/python/pyspark/sql/pandas/functions.py +480 -0
  645. snowflake/snowpark_connect/includes/python/pyspark/sql/pandas/functions.pyi +132 -0
  646. snowflake/snowpark_connect/includes/python/pyspark/sql/pandas/group_ops.py +523 -0
  647. snowflake/snowpark_connect/includes/python/pyspark/sql/pandas/map_ops.py +216 -0
  648. snowflake/snowpark_connect/includes/python/pyspark/sql/pandas/serializers.py +1019 -0
  649. snowflake/snowpark_connect/includes/python/pyspark/sql/pandas/typehints.py +172 -0
  650. snowflake/snowpark_connect/includes/python/pyspark/sql/pandas/types.py +972 -0
  651. snowflake/snowpark_connect/includes/python/pyspark/sql/pandas/utils.py +86 -0
  652. snowflake/snowpark_connect/includes/python/pyspark/sql/protobuf/__init__.py +18 -0
  653. snowflake/snowpark_connect/includes/python/pyspark/sql/protobuf/functions.py +334 -0
  654. snowflake/snowpark_connect/includes/python/pyspark/sql/readwriter.py +2159 -0
  655. snowflake/snowpark_connect/includes/python/pyspark/sql/session.py +2088 -0
  656. snowflake/snowpark_connect/includes/python/pyspark/sql/sql_formatter.py +84 -0
  657. snowflake/snowpark_connect/includes/python/pyspark/sql/streaming/__init__.py +21 -0
  658. snowflake/snowpark_connect/includes/python/pyspark/sql/streaming/listener.py +1050 -0
  659. snowflake/snowpark_connect/includes/python/pyspark/sql/streaming/query.py +746 -0
  660. snowflake/snowpark_connect/includes/python/pyspark/sql/streaming/readwriter.py +1652 -0
  661. snowflake/snowpark_connect/includes/python/pyspark/sql/streaming/state.py +288 -0
  662. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/__init__.py +16 -0
  663. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/__init__.py +16 -0
  664. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/client/__init__.py +16 -0
  665. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/client/test_artifact.py +420 -0
  666. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/client/test_client.py +358 -0
  667. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/streaming/__init__.py +16 -0
  668. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/streaming/test_parity_foreach.py +36 -0
  669. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/streaming/test_parity_foreach_batch.py +44 -0
  670. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/streaming/test_parity_listener.py +116 -0
  671. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/streaming/test_parity_streaming.py +35 -0
  672. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_connect_basic.py +3612 -0
  673. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_connect_column.py +1042 -0
  674. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_connect_function.py +2381 -0
  675. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_connect_plan.py +1060 -0
  676. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_arrow.py +163 -0
  677. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_arrow_map.py +38 -0
  678. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_arrow_python_udf.py +48 -0
  679. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_catalog.py +36 -0
  680. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_column.py +55 -0
  681. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_conf.py +36 -0
  682. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_dataframe.py +96 -0
  683. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_datasources.py +44 -0
  684. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_errors.py +36 -0
  685. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_functions.py +59 -0
  686. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_group.py +36 -0
  687. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_pandas_cogrouped_map.py +59 -0
  688. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_pandas_grouped_map.py +74 -0
  689. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_pandas_grouped_map_with_state.py +62 -0
  690. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_pandas_map.py +58 -0
  691. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_pandas_udf.py +70 -0
  692. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_pandas_udf_grouped_agg.py +50 -0
  693. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_pandas_udf_scalar.py +68 -0
  694. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_pandas_udf_window.py +40 -0
  695. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_readwriter.py +46 -0
  696. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_serde.py +44 -0
  697. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_types.py +100 -0
  698. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_udf.py +100 -0
  699. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_udtf.py +163 -0
  700. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_session.py +181 -0
  701. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_utils.py +42 -0
  702. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/__init__.py +16 -0
  703. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_cogrouped_map.py +623 -0
  704. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_grouped_map.py +869 -0
  705. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_grouped_map_with_state.py +342 -0
  706. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_map.py +436 -0
  707. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_udf.py +363 -0
  708. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_udf_grouped_agg.py +592 -0
  709. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_udf_scalar.py +1503 -0
  710. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_udf_typehints.py +392 -0
  711. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_udf_typehints_with_future_annotations.py +375 -0
  712. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_udf_window.py +411 -0
  713. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/streaming/__init__.py +16 -0
  714. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/streaming/test_streaming.py +401 -0
  715. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/streaming/test_streaming_foreach.py +295 -0
  716. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/streaming/test_streaming_foreach_batch.py +106 -0
  717. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/streaming/test_streaming_listener.py +558 -0
  718. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_arrow.py +1346 -0
  719. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_arrow_map.py +182 -0
  720. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_arrow_python_udf.py +202 -0
  721. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_catalog.py +503 -0
  722. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_column.py +225 -0
  723. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_conf.py +83 -0
  724. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_context.py +201 -0
  725. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_dataframe.py +1931 -0
  726. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_datasources.py +256 -0
  727. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_errors.py +69 -0
  728. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_functions.py +1349 -0
  729. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_group.py +53 -0
  730. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_pandas_sqlmetrics.py +68 -0
  731. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_readwriter.py +283 -0
  732. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_serde.py +155 -0
  733. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_session.py +412 -0
  734. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_types.py +1581 -0
  735. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_udf.py +961 -0
  736. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_udf_profiler.py +165 -0
  737. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_udtf.py +1456 -0
  738. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_utils.py +1686 -0
  739. snowflake/snowpark_connect/includes/python/pyspark/sql/types.py +2558 -0
  740. snowflake/snowpark_connect/includes/python/pyspark/sql/udf.py +714 -0
  741. snowflake/snowpark_connect/includes/python/pyspark/sql/udtf.py +325 -0
  742. snowflake/snowpark_connect/includes/python/pyspark/sql/utils.py +339 -0
  743. snowflake/snowpark_connect/includes/python/pyspark/sql/window.py +492 -0
  744. snowflake/snowpark_connect/includes/python/pyspark/statcounter.py +165 -0
  745. snowflake/snowpark_connect/includes/python/pyspark/status.py +112 -0
  746. snowflake/snowpark_connect/includes/python/pyspark/storagelevel.py +97 -0
  747. snowflake/snowpark_connect/includes/python/pyspark/streaming/__init__.py +22 -0
  748. snowflake/snowpark_connect/includes/python/pyspark/streaming/context.py +471 -0
  749. snowflake/snowpark_connect/includes/python/pyspark/streaming/dstream.py +933 -0
  750. snowflake/snowpark_connect/includes/python/pyspark/streaming/kinesis.py +205 -0
  751. snowflake/snowpark_connect/includes/python/pyspark/streaming/listener.py +83 -0
  752. snowflake/snowpark_connect/includes/python/pyspark/streaming/tests/__init__.py +16 -0
  753. snowflake/snowpark_connect/includes/python/pyspark/streaming/tests/test_context.py +184 -0
  754. snowflake/snowpark_connect/includes/python/pyspark/streaming/tests/test_dstream.py +706 -0
  755. snowflake/snowpark_connect/includes/python/pyspark/streaming/tests/test_kinesis.py +118 -0
  756. snowflake/snowpark_connect/includes/python/pyspark/streaming/tests/test_listener.py +160 -0
  757. snowflake/snowpark_connect/includes/python/pyspark/streaming/util.py +168 -0
  758. snowflake/snowpark_connect/includes/python/pyspark/taskcontext.py +502 -0
  759. snowflake/snowpark_connect/includes/python/pyspark/testing/__init__.py +21 -0
  760. snowflake/snowpark_connect/includes/python/pyspark/testing/connectutils.py +199 -0
  761. snowflake/snowpark_connect/includes/python/pyspark/testing/mllibutils.py +30 -0
  762. snowflake/snowpark_connect/includes/python/pyspark/testing/mlutils.py +275 -0
  763. snowflake/snowpark_connect/includes/python/pyspark/testing/objects.py +121 -0
  764. snowflake/snowpark_connect/includes/python/pyspark/testing/pandasutils.py +714 -0
  765. snowflake/snowpark_connect/includes/python/pyspark/testing/sqlutils.py +168 -0
  766. snowflake/snowpark_connect/includes/python/pyspark/testing/streamingutils.py +178 -0
  767. snowflake/snowpark_connect/includes/python/pyspark/testing/utils.py +636 -0
  768. snowflake/snowpark_connect/includes/python/pyspark/tests/__init__.py +16 -0
  769. snowflake/snowpark_connect/includes/python/pyspark/tests/test_appsubmit.py +306 -0
  770. snowflake/snowpark_connect/includes/python/pyspark/tests/test_broadcast.py +196 -0
  771. snowflake/snowpark_connect/includes/python/pyspark/tests/test_conf.py +44 -0
  772. snowflake/snowpark_connect/includes/python/pyspark/tests/test_context.py +346 -0
  773. snowflake/snowpark_connect/includes/python/pyspark/tests/test_daemon.py +89 -0
  774. snowflake/snowpark_connect/includes/python/pyspark/tests/test_install_spark.py +124 -0
  775. snowflake/snowpark_connect/includes/python/pyspark/tests/test_join.py +69 -0
  776. snowflake/snowpark_connect/includes/python/pyspark/tests/test_memory_profiler.py +167 -0
  777. snowflake/snowpark_connect/includes/python/pyspark/tests/test_pin_thread.py +194 -0
  778. snowflake/snowpark_connect/includes/python/pyspark/tests/test_profiler.py +168 -0
  779. snowflake/snowpark_connect/includes/python/pyspark/tests/test_rdd.py +939 -0
  780. snowflake/snowpark_connect/includes/python/pyspark/tests/test_rddbarrier.py +52 -0
  781. snowflake/snowpark_connect/includes/python/pyspark/tests/test_rddsampler.py +66 -0
  782. snowflake/snowpark_connect/includes/python/pyspark/tests/test_readwrite.py +368 -0
  783. snowflake/snowpark_connect/includes/python/pyspark/tests/test_serializers.py +257 -0
  784. snowflake/snowpark_connect/includes/python/pyspark/tests/test_shuffle.py +267 -0
  785. snowflake/snowpark_connect/includes/python/pyspark/tests/test_stage_sched.py +153 -0
  786. snowflake/snowpark_connect/includes/python/pyspark/tests/test_statcounter.py +130 -0
  787. snowflake/snowpark_connect/includes/python/pyspark/tests/test_taskcontext.py +350 -0
  788. snowflake/snowpark_connect/includes/python/pyspark/tests/test_util.py +97 -0
  789. snowflake/snowpark_connect/includes/python/pyspark/tests/test_worker.py +271 -0
  790. snowflake/snowpark_connect/includes/python/pyspark/traceback_utils.py +81 -0
  791. snowflake/snowpark_connect/includes/python/pyspark/util.py +416 -0
  792. snowflake/snowpark_connect/includes/python/pyspark/version.py +19 -0
  793. snowflake/snowpark_connect/includes/python/pyspark/worker.py +1307 -0
  794. snowflake/snowpark_connect/includes/python/pyspark/worker_util.py +46 -0
  795. snowflake/snowpark_connect/proto/__init__.py +10 -0
  796. snowflake/snowpark_connect/proto/control_pb2.py +35 -0
  797. snowflake/snowpark_connect/proto/control_pb2.pyi +38 -0
  798. snowflake/snowpark_connect/proto/control_pb2_grpc.py +183 -0
  799. snowflake/snowpark_connect/proto/snowflake_expression_ext_pb2.py +35 -0
  800. snowflake/snowpark_connect/proto/snowflake_expression_ext_pb2.pyi +53 -0
  801. snowflake/snowpark_connect/proto/snowflake_rdd_pb2.pyi +39 -0
  802. snowflake/snowpark_connect/proto/snowflake_relation_ext_pb2.py +47 -0
  803. snowflake/snowpark_connect/proto/snowflake_relation_ext_pb2.pyi +111 -0
  804. snowflake/snowpark_connect/relation/__init__.py +3 -0
  805. snowflake/snowpark_connect/relation/catalogs/__init__.py +12 -0
  806. snowflake/snowpark_connect/relation/catalogs/abstract_spark_catalog.py +287 -0
  807. snowflake/snowpark_connect/relation/catalogs/snowflake_catalog.py +467 -0
  808. snowflake/snowpark_connect/relation/catalogs/utils.py +51 -0
  809. snowflake/snowpark_connect/relation/io_utils.py +76 -0
  810. snowflake/snowpark_connect/relation/map_aggregate.py +322 -0
  811. snowflake/snowpark_connect/relation/map_catalog.py +151 -0
  812. snowflake/snowpark_connect/relation/map_column_ops.py +1068 -0
  813. snowflake/snowpark_connect/relation/map_crosstab.py +48 -0
  814. snowflake/snowpark_connect/relation/map_extension.py +412 -0
  815. snowflake/snowpark_connect/relation/map_join.py +341 -0
  816. snowflake/snowpark_connect/relation/map_local_relation.py +326 -0
  817. snowflake/snowpark_connect/relation/map_map_partitions.py +146 -0
  818. snowflake/snowpark_connect/relation/map_relation.py +253 -0
  819. snowflake/snowpark_connect/relation/map_row_ops.py +716 -0
  820. snowflake/snowpark_connect/relation/map_sample_by.py +35 -0
  821. snowflake/snowpark_connect/relation/map_show_string.py +50 -0
  822. snowflake/snowpark_connect/relation/map_sql.py +1874 -0
  823. snowflake/snowpark_connect/relation/map_stats.py +324 -0
  824. snowflake/snowpark_connect/relation/map_subquery_alias.py +32 -0
  825. snowflake/snowpark_connect/relation/map_udtf.py +288 -0
  826. snowflake/snowpark_connect/relation/read/__init__.py +7 -0
  827. snowflake/snowpark_connect/relation/read/jdbc_read_dbapi.py +668 -0
  828. snowflake/snowpark_connect/relation/read/map_read.py +367 -0
  829. snowflake/snowpark_connect/relation/read/map_read_csv.py +142 -0
  830. snowflake/snowpark_connect/relation/read/map_read_jdbc.py +108 -0
  831. snowflake/snowpark_connect/relation/read/map_read_json.py +344 -0
  832. snowflake/snowpark_connect/relation/read/map_read_parquet.py +194 -0
  833. snowflake/snowpark_connect/relation/read/map_read_socket.py +59 -0
  834. snowflake/snowpark_connect/relation/read/map_read_table.py +109 -0
  835. snowflake/snowpark_connect/relation/read/map_read_text.py +106 -0
  836. snowflake/snowpark_connect/relation/read/reader_config.py +399 -0
  837. snowflake/snowpark_connect/relation/read/utils.py +155 -0
  838. snowflake/snowpark_connect/relation/stage_locator.py +161 -0
  839. snowflake/snowpark_connect/relation/utils.py +219 -0
  840. snowflake/snowpark_connect/relation/write/__init__.py +3 -0
  841. snowflake/snowpark_connect/relation/write/jdbc_write_dbapi.py +339 -0
  842. snowflake/snowpark_connect/relation/write/map_write.py +436 -0
  843. snowflake/snowpark_connect/relation/write/map_write_jdbc.py +48 -0
  844. snowflake/snowpark_connect/resources/java_udfs-1.0-SNAPSHOT.jar +0 -0
  845. snowflake/snowpark_connect/resources_initializer.py +75 -0
  846. snowflake/snowpark_connect/server.py +1136 -0
  847. snowflake/snowpark_connect/start_server.py +32 -0
  848. snowflake/snowpark_connect/tcm.py +8 -0
  849. snowflake/snowpark_connect/type_mapping.py +1003 -0
  850. snowflake/snowpark_connect/typed_column.py +94 -0
  851. snowflake/snowpark_connect/utils/__init__.py +3 -0
  852. snowflake/snowpark_connect/utils/artifacts.py +48 -0
  853. snowflake/snowpark_connect/utils/attribute_handling.py +72 -0
  854. snowflake/snowpark_connect/utils/cache.py +84 -0
  855. snowflake/snowpark_connect/utils/concurrent.py +124 -0
  856. snowflake/snowpark_connect/utils/context.py +390 -0
  857. snowflake/snowpark_connect/utils/describe_query_cache.py +231 -0
  858. snowflake/snowpark_connect/utils/interrupt.py +85 -0
  859. snowflake/snowpark_connect/utils/io_utils.py +35 -0
  860. snowflake/snowpark_connect/utils/pandas_udtf_utils.py +117 -0
  861. snowflake/snowpark_connect/utils/profiling.py +47 -0
  862. snowflake/snowpark_connect/utils/session.py +180 -0
  863. snowflake/snowpark_connect/utils/snowpark_connect_logging.py +38 -0
  864. snowflake/snowpark_connect/utils/telemetry.py +513 -0
  865. snowflake/snowpark_connect/utils/udf_cache.py +392 -0
  866. snowflake/snowpark_connect/utils/udf_helper.py +328 -0
  867. snowflake/snowpark_connect/utils/udf_utils.py +310 -0
  868. snowflake/snowpark_connect/utils/udtf_helper.py +420 -0
  869. snowflake/snowpark_connect/utils/udtf_utils.py +799 -0
  870. snowflake/snowpark_connect/utils/xxhash64.py +247 -0
  871. snowflake/snowpark_connect/version.py +6 -0
  872. snowpark_connect-0.20.2.data/scripts/snowpark-connect +71 -0
  873. snowpark_connect-0.20.2.data/scripts/snowpark-session +11 -0
  874. snowpark_connect-0.20.2.data/scripts/snowpark-submit +354 -0
  875. snowpark_connect-0.20.2.dist-info/METADATA +37 -0
  876. snowpark_connect-0.20.2.dist-info/RECORD +879 -0
  877. snowpark_connect-0.20.2.dist-info/WHEEL +5 -0
  878. snowpark_connect-0.20.2.dist-info/licenses/LICENSE.txt +202 -0
  879. snowpark_connect-0.20.2.dist-info/top_level.txt +1 -0
@@ -0,0 +1,2371 @@
1
+ #
2
+ # Licensed to the Apache Software Foundation (ASF) under one or more
3
+ # contributor license agreements. See the NOTICE file distributed with
4
+ # this work for additional information regarding copyright ownership.
5
+ # The ASF licenses this file to You under the Apache License, Version 2.0
6
+ # (the "License"); you may not use this file except in compliance with
7
+ # the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing, software
12
+ # distributed under the License is distributed on an "AS IS" BASIS,
13
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ # See the License for the specific language governing permissions and
15
+ # limitations under the License.
16
+ #
17
+
18
+ """
19
+ String functions on pandas-on-Spark Series
20
+ """
21
+ import warnings
22
+ from typing import (
23
+ Any,
24
+ Callable,
25
+ Dict,
26
+ List,
27
+ Optional,
28
+ Union,
29
+ cast,
30
+ no_type_check,
31
+ )
32
+
33
+ import numpy as np
34
+
35
+ import pandas as pd
36
+ from pyspark.sql.types import StringType, BinaryType, ArrayType, LongType, MapType
37
+ from pyspark.sql import functions as F
38
+ from pyspark.sql.functions import pandas_udf
39
+
40
+ import pyspark.pandas as ps
41
+ from pyspark.pandas.spark import functions as SF
42
+
43
+
44
+ class StringMethods:
45
+ """String methods for pandas-on-Spark Series"""
46
+
47
+ def __init__(self, series: "ps.Series"):
48
+ if not isinstance(series.spark.data_type, (StringType, BinaryType, ArrayType)):
49
+ raise ValueError("Cannot call StringMethods on type {}".format(series.spark.data_type))
50
+ self._data = series
51
+
52
+ # Methods
53
+ def capitalize(self) -> "ps.Series":
54
+ """
55
+ Convert Strings in the series to be capitalized.
56
+
57
+ Examples
58
+ --------
59
+ >>> s = ps.Series(['lower', 'CAPITALS', 'this is a sentence', 'SwApCaSe'])
60
+ >>> s
61
+ 0 lower
62
+ 1 CAPITALS
63
+ 2 this is a sentence
64
+ 3 SwApCaSe
65
+ dtype: object
66
+
67
+ >>> s.str.capitalize()
68
+ 0 Lower
69
+ 1 Capitals
70
+ 2 This is a sentence
71
+ 3 Swapcase
72
+ dtype: object
73
+ """
74
+
75
+ def pandas_capitalize(s) -> ps.Series[str]: # type: ignore[no-untyped-def]
76
+ return s.str.capitalize()
77
+
78
+ return self._data.pandas_on_spark.transform_batch(pandas_capitalize)
79
+
80
+ def title(self) -> "ps.Series":
81
+ """
82
+ Convert Strings in the series to be title case.
83
+
84
+ Examples
85
+ --------
86
+ >>> s = ps.Series(['lower', 'CAPITALS', 'this is a sentence', 'SwApCaSe'])
87
+ >>> s
88
+ 0 lower
89
+ 1 CAPITALS
90
+ 2 this is a sentence
91
+ 3 SwApCaSe
92
+ dtype: object
93
+
94
+ >>> s.str.title()
95
+ 0 Lower
96
+ 1 Capitals
97
+ 2 This Is A Sentence
98
+ 3 Swapcase
99
+ dtype: object
100
+ """
101
+
102
+ def pandas_title(s) -> ps.Series[str]: # type: ignore[no-untyped-def]
103
+ return s.str.title()
104
+
105
+ return self._data.pandas_on_spark.transform_batch(pandas_title)
106
+
107
+ def lower(self) -> "ps.Series":
108
+ """
109
+ Convert strings in the Series/Index to all lowercase.
110
+
111
+ Examples
112
+ --------
113
+ >>> s = ps.Series(['lower', 'CAPITALS', 'this is a sentence', 'SwApCaSe'])
114
+ >>> s
115
+ 0 lower
116
+ 1 CAPITALS
117
+ 2 this is a sentence
118
+ 3 SwApCaSe
119
+ dtype: object
120
+
121
+ >>> s.str.lower()
122
+ 0 lower
123
+ 1 capitals
124
+ 2 this is a sentence
125
+ 3 swapcase
126
+ dtype: object
127
+ """
128
+ return self._data.spark.transform(F.lower)
129
+
130
+ def upper(self) -> "ps.Series":
131
+ """
132
+ Convert strings in the Series/Index to all uppercase.
133
+
134
+ Examples
135
+ --------
136
+ >>> s = ps.Series(['lower', 'CAPITALS', 'this is a sentence', 'SwApCaSe'])
137
+ >>> s
138
+ 0 lower
139
+ 1 CAPITALS
140
+ 2 this is a sentence
141
+ 3 SwApCaSe
142
+ dtype: object
143
+
144
+ >>> s.str.upper()
145
+ 0 LOWER
146
+ 1 CAPITALS
147
+ 2 THIS IS A SENTENCE
148
+ 3 SWAPCASE
149
+ dtype: object
150
+ """
151
+ return self._data.spark.transform(F.upper)
152
+
153
+ def swapcase(self) -> "ps.Series":
154
+ """
155
+ Convert strings in the Series/Index to be swap cased.
156
+
157
+ Examples
158
+ --------
159
+ >>> s = ps.Series(['lower', 'CAPITALS', 'this is a sentence', 'SwApCaSe'])
160
+ >>> s
161
+ 0 lower
162
+ 1 CAPITALS
163
+ 2 this is a sentence
164
+ 3 SwApCaSe
165
+ dtype: object
166
+
167
+ >>> s.str.swapcase()
168
+ 0 LOWER
169
+ 1 capitals
170
+ 2 THIS IS A SENTENCE
171
+ 3 sWaPcAsE
172
+ dtype: object
173
+ """
174
+
175
+ def pandas_swapcase(s) -> ps.Series[str]: # type: ignore[no-untyped-def]
176
+ return s.str.swapcase()
177
+
178
+ return self._data.pandas_on_spark.transform_batch(pandas_swapcase)
179
+
180
+ def startswith(self, pattern: str, na: Optional[Any] = None) -> "ps.Series":
181
+ """
182
+ Test if the start of each string element matches a pattern.
183
+
184
+ Equivalent to :func:`str.startswith`.
185
+
186
+ Parameters
187
+ ----------
188
+ pattern : str
189
+ Character sequence. Regular expressions are not accepted.
190
+ na : object, default None
191
+ Object shown if element is not a string. NaN converted to None.
192
+
193
+ Returns
194
+ -------
195
+ Series of bool or object
196
+ pandas-on-Spark Series of booleans indicating whether the given pattern
197
+ matches the start of each string element.
198
+
199
+ Examples
200
+ --------
201
+ >>> s = ps.Series(['bat', 'Bear', 'cat', np.nan])
202
+ >>> s
203
+ 0 bat
204
+ 1 Bear
205
+ 2 cat
206
+ 3 None
207
+ dtype: object
208
+
209
+ >>> s.str.startswith('b')
210
+ 0 True
211
+ 1 False
212
+ 2 False
213
+ 3 None
214
+ dtype: object
215
+
216
+ Specifying na to be False instead of None.
217
+
218
+ >>> s.str.startswith('b', na=False)
219
+ 0 True
220
+ 1 False
221
+ 2 False
222
+ 3 False
223
+ dtype: bool
224
+ """
225
+
226
+ def pandas_startswith(s) -> ps.Series[bool]: # type: ignore[no-untyped-def]
227
+ return s.str.startswith(pattern, na)
228
+
229
+ return self._data.pandas_on_spark.transform_batch(pandas_startswith)
230
+
231
+ def endswith(self, pattern: str, na: Optional[Any] = None) -> "ps.Series":
232
+ """
233
+ Test if the end of each string element matches a pattern.
234
+
235
+ Equivalent to :func:`str.endswith`.
236
+
237
+ Parameters
238
+ ----------
239
+ pattern : str
240
+ Character sequence. Regular expressions are not accepted.
241
+ na : object, default None
242
+ Object shown if element is not a string. NaN converted to None.
243
+
244
+ Returns
245
+ -------
246
+ Series of bool or object
247
+ pandas-on-Spark Series of booleans indicating whether the given pattern
248
+ matches the end of each string element.
249
+
250
+ Examples
251
+ --------
252
+ >>> s = ps.Series(['bat', 'Bear', 'cat', np.nan])
253
+ >>> s
254
+ 0 bat
255
+ 1 Bear
256
+ 2 cat
257
+ 3 None
258
+ dtype: object
259
+
260
+ >>> s.str.endswith('t')
261
+ 0 True
262
+ 1 False
263
+ 2 True
264
+ 3 None
265
+ dtype: object
266
+
267
+ Specifying na to be False instead of None.
268
+
269
+ >>> s.str.endswith('t', na=False)
270
+ 0 True
271
+ 1 False
272
+ 2 True
273
+ 3 False
274
+ dtype: bool
275
+ """
276
+
277
+ def pandas_endswith(s) -> ps.Series[bool]: # type: ignore[no-untyped-def]
278
+ return s.str.endswith(pattern, na)
279
+
280
+ return self._data.pandas_on_spark.transform_batch(pandas_endswith)
281
+
282
+ def strip(self, to_strip: Optional[str] = None) -> "ps.Series":
283
+ """
284
+ Remove leading and trailing characters.
285
+
286
+ Strip whitespaces (including newlines) or a set of specified
287
+ characters from each string in the Series/Index from left and
288
+ right sides. Equivalent to :func:`str.strip`.
289
+
290
+ Parameters
291
+ ----------
292
+ to_strip : str
293
+ Specifying the set of characters to be removed. All combinations
294
+ of this set of characters will be stripped. If None then
295
+ whitespaces are removed.
296
+
297
+ Returns
298
+ -------
299
+ Series of objects
300
+
301
+ Examples
302
+ --------
303
+ >>> s = ps.Series(['1. Ant.', '2. Bee!\\t', None])
304
+ >>> s
305
+ 0 1. Ant.
306
+ 1 2. Bee!\\t
307
+ 2 None
308
+ dtype: object
309
+
310
+ >>> s.str.strip()
311
+ 0 1. Ant.
312
+ 1 2. Bee!
313
+ 2 None
314
+ dtype: object
315
+
316
+ >>> s.str.strip('12.')
317
+ 0 Ant
318
+ 1 Bee!\\t
319
+ 2 None
320
+ dtype: object
321
+
322
+ >>> s.str.strip('.!\\t')
323
+ 0 1. Ant
324
+ 1 2. Bee
325
+ 2 None
326
+ dtype: object
327
+ """
328
+
329
+ def pandas_strip(s) -> ps.Series[str]: # type: ignore[no-untyped-def]
330
+ return s.str.strip(to_strip)
331
+
332
+ return self._data.pandas_on_spark.transform_batch(pandas_strip)
333
+
334
+ def lstrip(self, to_strip: Optional[str] = None) -> "ps.Series":
335
+ """
336
+ Remove leading characters.
337
+
338
+ Strip whitespaces (including newlines) or a set of specified
339
+ characters from each string in the Series/Index from left side.
340
+ Equivalent to :func:`str.lstrip`.
341
+
342
+ Parameters
343
+ ----------
344
+ to_strip : str
345
+ Specifying the set of characters to be removed. All combinations
346
+ of this set of characters will be stripped. If None then
347
+ whitespaces are removed.
348
+
349
+ Returns
350
+ -------
351
+ Series of object
352
+
353
+ Examples
354
+ --------
355
+ >>> s = ps.Series(['1. Ant.', '2. Bee!\\t', None])
356
+ >>> s
357
+ 0 1. Ant.
358
+ 1 2. Bee!\\t
359
+ 2 None
360
+ dtype: object
361
+
362
+ >>> s.str.lstrip('12.')
363
+ 0 Ant.
364
+ 1 Bee!\\t
365
+ 2 None
366
+ dtype: object
367
+ """
368
+
369
+ def pandas_lstrip(s) -> ps.Series[str]: # type: ignore[no-untyped-def]
370
+ return s.str.lstrip(to_strip)
371
+
372
+ return self._data.pandas_on_spark.transform_batch(pandas_lstrip)
373
+
374
+ def rstrip(self, to_strip: Optional[str] = None) -> "ps.Series":
375
+ """
376
+ Remove trailing characters.
377
+
378
+ Strip whitespaces (including newlines) or a set of specified
379
+ characters from each string in the Series/Index from right side.
380
+ Equivalent to :func:`str.rstrip`.
381
+
382
+ Parameters
383
+ ----------
384
+ to_strip : str
385
+ Specifying the set of characters to be removed. All combinations
386
+ of this set of characters will be stripped. If None then
387
+ whitespaces are removed.
388
+
389
+ Returns
390
+ -------
391
+ Series of object
392
+
393
+ Examples
394
+ --------
395
+ >>> s = ps.Series(['1. Ant.', '2. Bee!\\t', None])
396
+ >>> s
397
+ 0 1. Ant.
398
+ 1 2. Bee!\\t
399
+ 2 None
400
+ dtype: object
401
+
402
+ >>> s.str.rstrip('.!\\t')
403
+ 0 1. Ant
404
+ 1 2. Bee
405
+ 2 None
406
+ dtype: object
407
+ """
408
+
409
+ def pandas_rstrip(s) -> ps.Series[str]: # type: ignore[no-untyped-def]
410
+ return s.str.rstrip(to_strip)
411
+
412
+ return self._data.pandas_on_spark.transform_batch(pandas_rstrip)
413
+
414
+ def get(self, i: int) -> "ps.Series":
415
+ """
416
+ Extract element from each string or string list/tuple in the Series
417
+ at the specified position.
418
+
419
+ Parameters
420
+ ----------
421
+ i : int
422
+ Position of element to extract.
423
+
424
+ Returns
425
+ -------
426
+ Series of objects
427
+
428
+ Examples
429
+ --------
430
+ >>> s1 = ps.Series(["String", "123"])
431
+ >>> s1
432
+ 0 String
433
+ 1 123
434
+ dtype: object
435
+
436
+ >>> s1.str.get(1)
437
+ 0 t
438
+ 1 2
439
+ dtype: object
440
+
441
+ >>> s1.str.get(-1)
442
+ 0 g
443
+ 1 3
444
+ dtype: object
445
+
446
+ >>> s2 = ps.Series([["a", "b", "c"], ["x", "y"]])
447
+ >>> s2
448
+ 0 [a, b, c]
449
+ 1 [x, y]
450
+ dtype: object
451
+
452
+ >>> s2.str.get(0)
453
+ 0 a
454
+ 1 x
455
+ dtype: object
456
+
457
+ >>> s2.str.get(2)
458
+ 0 c
459
+ 1 None
460
+ dtype: object
461
+ """
462
+
463
+ def pandas_get(s) -> ps.Series[str]: # type: ignore[no-untyped-def]
464
+ return s.str.get(i)
465
+
466
+ return self._data.pandas_on_spark.transform_batch(pandas_get)
467
+
468
+ def isalnum(self) -> "ps.Series":
469
+ """
470
+ Check whether all characters in each string are alphanumeric.
471
+
472
+ This is equivalent to running the Python string method
473
+ :func:`str.isalnum` for each element of the Series/Index.
474
+ If a string has zero characters, False is returned for that check.
475
+
476
+ Examples
477
+ --------
478
+ >>> s1 = ps.Series(['one', 'one1', '1', ''])
479
+
480
+ >>> s1.str.isalnum()
481
+ 0 True
482
+ 1 True
483
+ 2 True
484
+ 3 False
485
+ dtype: bool
486
+
487
+ Note that checks against characters mixed with any additional
488
+ punctuation or whitespace will evaluate too false for an alphanumeric
489
+ check.
490
+
491
+ >>> s2 = ps.Series(['A B', '1.5', '3,000'])
492
+ >>> s2.str.isalnum()
493
+ 0 False
494
+ 1 False
495
+ 2 False
496
+ dtype: bool
497
+ """
498
+
499
+ def pandas_isalnum(s) -> ps.Series[bool]: # type: ignore[no-untyped-def]
500
+ return s.str.isalnum()
501
+
502
+ return self._data.pandas_on_spark.transform_batch(pandas_isalnum)
503
+
504
+ def isalpha(self) -> "ps.Series":
505
+ """
506
+ Check whether all characters in each string are alphabetic.
507
+
508
+ This is equivalent to running the Python string method
509
+ :func:`str.isalpha` for each element of the Series/Index.
510
+ If a string has zero characters, False is returned for that check.
511
+
512
+ Examples
513
+ --------
514
+ >>> s1 = ps.Series(['one', 'one1', '1', ''])
515
+
516
+ >>> s1.str.isalpha()
517
+ 0 True
518
+ 1 False
519
+ 2 False
520
+ 3 False
521
+ dtype: bool
522
+ """
523
+
524
+ def pandas_isalpha(s) -> ps.Series[bool]: # type: ignore[no-untyped-def]
525
+ return s.str.isalpha()
526
+
527
+ return self._data.pandas_on_spark.transform_batch(pandas_isalpha)
528
+
529
+ def isdigit(self) -> "ps.Series":
530
+ """
531
+ Check whether all characters in each string are digits.
532
+
533
+ This is equivalent to running the Python string method
534
+ :func:`str.isdigit` for each element of the Series/Index.
535
+ If a string has zero characters, False is returned for that check.
536
+
537
+ Examples
538
+ --------
539
+ >>> s = ps.Series(['23', '³', '⅕', ''])
540
+
541
+ The s.str.isdecimal method checks for characters used to form numbers
542
+ in base 10.
543
+
544
+ >>> s.str.isdecimal()
545
+ 0 True
546
+ 1 False
547
+ 2 False
548
+ 3 False
549
+ dtype: bool
550
+
551
+ The s.str.isdigit method is the same as s.str.isdecimal but also
552
+ includes special digits, like superscripted and subscripted digits in
553
+ Unicode.
554
+
555
+ >>> s.str.isdigit()
556
+ 0 True
557
+ 1 True
558
+ 2 False
559
+ 3 False
560
+ dtype: bool
561
+
562
+ The s.str.isnumeric method is the same as s.str.isdigit but also
563
+ includes other characters that can represent quantities such as unicode
564
+ fractions.
565
+
566
+ >>> s.str.isnumeric()
567
+ 0 True
568
+ 1 True
569
+ 2 True
570
+ 3 False
571
+ dtype: bool
572
+ """
573
+
574
+ def pandas_isdigit(s) -> ps.Series[bool]: # type: ignore[no-untyped-def]
575
+ return s.str.isdigit()
576
+
577
+ return self._data.pandas_on_spark.transform_batch(pandas_isdigit)
578
+
579
+ def isspace(self) -> "ps.Series":
580
+ """
581
+ Check whether all characters in each string are whitespaces.
582
+
583
+ This is equivalent to running the Python string method
584
+ :func:`str.isspace` for each element of the Series/Index.
585
+ If a string has zero characters, False is returned for that check.
586
+
587
+ Examples
588
+ --------
589
+ >>> s = ps.Series([' ', '\\t\\r\\n ', ''])
590
+ >>> s.str.isspace()
591
+ 0 True
592
+ 1 True
593
+ 2 False
594
+ dtype: bool
595
+ """
596
+
597
+ def pandas_isspace(s) -> ps.Series[bool]: # type: ignore[no-untyped-def]
598
+ return s.str.isspace()
599
+
600
+ return self._data.pandas_on_spark.transform_batch(pandas_isspace)
601
+
602
+ def islower(self) -> "ps.Series":
603
+ """
604
+ Check whether all characters in each string are lowercase.
605
+
606
+ This is equivalent to running the Python string method
607
+ :func:`str.islower` for each element of the Series/Index.
608
+ If a string has zero characters, False is returned for that check.
609
+
610
+ Examples
611
+ --------
612
+ >>> s = ps.Series(['leopard', 'Golden Eagle', 'SNAKE', ''])
613
+ >>> s.str.islower()
614
+ 0 True
615
+ 1 False
616
+ 2 False
617
+ 3 False
618
+ dtype: bool
619
+ """
620
+
621
+ def pandas_isspace(s) -> ps.Series[bool]: # type: ignore[no-untyped-def]
622
+ return s.str.islower()
623
+
624
+ return self._data.pandas_on_spark.transform_batch(pandas_isspace)
625
+
626
+ def isupper(self) -> "ps.Series":
627
+ """
628
+ Check whether all characters in each string are uppercase.
629
+
630
+ This is equivalent to running the Python string method
631
+ :func:`str.isupper` for each element of the Series/Index.
632
+ If a string has zero characters, False is returned for that check.
633
+
634
+ Examples
635
+ --------
636
+ >>> s = ps.Series(['leopard', 'Golden Eagle', 'SNAKE', ''])
637
+ >>> s.str.isupper()
638
+ 0 False
639
+ 1 False
640
+ 2 True
641
+ 3 False
642
+ dtype: bool
643
+ """
644
+
645
+ def pandas_isspace(s) -> ps.Series[bool]: # type: ignore[no-untyped-def]
646
+ return s.str.isupper()
647
+
648
+ return self._data.pandas_on_spark.transform_batch(pandas_isspace)
649
+
650
+ def istitle(self) -> "ps.Series":
651
+ """
652
+ Check whether all characters in each string are title case.
653
+
654
+ This is equivalent to running the Python string method
655
+ :func:`str.istitle` for each element of the Series/Index.
656
+ If a string has zero characters, False is returned for that check.
657
+
658
+ Examples
659
+ --------
660
+ >>> s = ps.Series(['leopard', 'Golden Eagle', 'SNAKE', ''])
661
+
662
+ The s.str.istitle method checks for whether all words are in title
663
+ case (whether only the first letter of each word is capitalized).
664
+ Words are assumed to be as any sequence of non-numeric characters
665
+ separated by whitespace characters.
666
+
667
+ >>> s.str.istitle()
668
+ 0 False
669
+ 1 True
670
+ 2 False
671
+ 3 False
672
+ dtype: bool
673
+ """
674
+
675
+ def pandas_istitle(s) -> ps.Series[bool]: # type: ignore[no-untyped-def]
676
+ return s.str.istitle()
677
+
678
+ return self._data.pandas_on_spark.transform_batch(pandas_istitle)
679
+
680
+ def isnumeric(self) -> "ps.Series":
681
+ """
682
+ Check whether all characters in each string are numeric.
683
+
684
+ This is equivalent to running the Python string method
685
+ :func:`str.isnumeric` for each element of the Series/Index.
686
+ If a string has zero characters, False is returned for that check.
687
+
688
+ Examples
689
+ --------
690
+ >>> s1 = ps.Series(['one', 'one1', '1', ''])
691
+ >>> s1.str.isnumeric()
692
+ 0 False
693
+ 1 False
694
+ 2 True
695
+ 3 False
696
+ dtype: bool
697
+
698
+ >>> s2 = ps.Series(['23', '³', '⅕', ''])
699
+
700
+ The s2.str.isdecimal method checks for characters used to form numbers
701
+ in base 10.
702
+
703
+ >>> s2.str.isdecimal()
704
+ 0 True
705
+ 1 False
706
+ 2 False
707
+ 3 False
708
+ dtype: bool
709
+
710
+ The s2.str.isdigit method is the same as s2.str.isdecimal but also
711
+ includes special digits, like superscripted and subscripted digits in
712
+ Unicode.
713
+
714
+ >>> s2.str.isdigit()
715
+ 0 True
716
+ 1 True
717
+ 2 False
718
+ 3 False
719
+ dtype: bool
720
+
721
+ The s2.str.isnumeric method is the same as s2.str.isdigit but also
722
+ includes other characters that can represent quantities such as unicode
723
+ fractions.
724
+
725
+ >>> s2.str.isnumeric()
726
+ 0 True
727
+ 1 True
728
+ 2 True
729
+ 3 False
730
+ dtype: bool
731
+ """
732
+
733
+ def pandas_isnumeric(s) -> ps.Series[bool]: # type: ignore[no-untyped-def]
734
+ return s.str.isnumeric()
735
+
736
+ return self._data.pandas_on_spark.transform_batch(pandas_isnumeric)
737
+
738
+ def isdecimal(self) -> "ps.Series":
739
+ """
740
+ Check whether all characters in each string are decimals.
741
+
742
+ This is equivalent to running the Python string method
743
+ :func:`str.isdecimal` for each element of the Series/Index.
744
+ If a string has zero characters, False is returned for that check.
745
+
746
+ Examples
747
+ --------
748
+ >>> s = ps.Series(['23', '³', '⅕', ''])
749
+
750
+ The s.str.isdecimal method checks for characters used to form numbers
751
+ in base 10.
752
+
753
+ >>> s.str.isdecimal()
754
+ 0 True
755
+ 1 False
756
+ 2 False
757
+ 3 False
758
+ dtype: bool
759
+
760
+ The s.str.isdigit method is the same as s.str.isdecimal but also
761
+ includes special digits, like superscripted and subscripted digits in
762
+ Unicode.
763
+
764
+ >>> s.str.isdigit()
765
+ 0 True
766
+ 1 True
767
+ 2 False
768
+ 3 False
769
+ dtype: bool
770
+
771
+ The s.str.isnumeric method is the same as s.str.isdigit but also
772
+ includes other characters that can represent quantities such as unicode
773
+ fractions.
774
+
775
+ >>> s.str.isnumeric()
776
+ 0 True
777
+ 1 True
778
+ 2 True
779
+ 3 False
780
+ dtype: bool
781
+ """
782
+
783
+ def pandas_isdecimal(s) -> ps.Series[bool]: # type: ignore[no-untyped-def]
784
+ return s.str.isdecimal()
785
+
786
+ return self._data.pandas_on_spark.transform_batch(pandas_isdecimal)
787
+
788
+ @no_type_check
789
+ def cat(self, others=None, sep=None, na_rep=None, join=None) -> "ps.Series":
790
+ """
791
+ Not supported.
792
+ """
793
+ raise NotImplementedError()
794
+
795
+ def center(self, width: int, fillchar: str = " ") -> "ps.Series":
796
+ """
797
+ Filling left and right side of strings in the Series/Index with an
798
+ additional character. Equivalent to :func:`str.center`.
799
+
800
+ Parameters
801
+ ----------
802
+ width : int
803
+ Minimum width of resulting string; additional characters will be
804
+ filled with fillchar.
805
+ fillchar : str
806
+ Additional character for filling, default is whitespace.
807
+
808
+ Returns
809
+ -------
810
+ Series of objects
811
+
812
+ Examples
813
+ --------
814
+ >>> s = ps.Series(["caribou", "tiger"])
815
+ >>> s
816
+ 0 caribou
817
+ 1 tiger
818
+ dtype: object
819
+
820
+ >>> s.str.center(width=10, fillchar='-')
821
+ 0 -caribou--
822
+ 1 --tiger---
823
+ dtype: object
824
+ """
825
+
826
+ def pandas_center(s) -> ps.Series[str]: # type: ignore[no-untyped-def]
827
+ return s.str.center(width, fillchar)
828
+
829
+ return self._data.pandas_on_spark.transform_batch(pandas_center)
830
+
831
+ def contains(
832
+ self, pat: str, case: bool = True, flags: int = 0, na: Any = None, regex: bool = True
833
+ ) -> "ps.Series":
834
+ """
835
+ Test if pattern or regex is contained within a string of a Series.
836
+
837
+ Return boolean Series based on whether a given pattern or regex is
838
+ contained within a string of a Series.
839
+
840
+ Analogous to :func:`match`, but less strict, relying on
841
+ :func:`re.search` instead of :func:`re.match`.
842
+
843
+ Parameters
844
+ ----------
845
+ pat : str
846
+ Character sequence or regular expression.
847
+ case : bool, default True
848
+ If True, case sensitive.
849
+ flags : int, default 0 (no flags)
850
+ Flags to pass through to the re module, e.g. re.IGNORECASE.
851
+ na : default None
852
+ Fill value for missing values. NaN converted to None.
853
+ regex : bool, default True
854
+ If True, assumes the pat is a regular expression.
855
+ If False, treats the pat as a literal string.
856
+
857
+
858
+ Returns
859
+ -------
860
+ Series of boolean values or object
861
+ A Series of boolean values indicating whether the given pattern is
862
+ contained within the string of each element of the Series.
863
+
864
+ Examples
865
+ --------
866
+ Returning a Series of booleans using only a literal pattern.
867
+
868
+ >>> s1 = ps.Series(['Mouse', 'dog', 'house and parrot', '23', np.NaN])
869
+ >>> s1.str.contains('og', regex=False)
870
+ 0 False
871
+ 1 True
872
+ 2 False
873
+ 3 False
874
+ 4 None
875
+ dtype: object
876
+
877
+ Specifying case sensitivity using case.
878
+
879
+ >>> s1.str.contains('oG', case=True, regex=True)
880
+ 0 False
881
+ 1 False
882
+ 2 False
883
+ 3 False
884
+ 4 None
885
+ dtype: object
886
+
887
+ Specifying na to be False instead of NaN replaces NaN values with
888
+ False. If Series does not contain NaN values the resultant dtype will
889
+ be bool, otherwise, an object dtype.
890
+
891
+ >>> s1.str.contains('og', na=False, regex=True)
892
+ 0 False
893
+ 1 True
894
+ 2 False
895
+ 3 False
896
+ 4 False
897
+ dtype: bool
898
+
899
+ Returning ‘house’ or ‘dog’ when either expression occurs in a string.
900
+
901
+ >>> s1.str.contains('house|dog', regex=True)
902
+ 0 False
903
+ 1 True
904
+ 2 True
905
+ 3 False
906
+ 4 None
907
+ dtype: object
908
+
909
+ Ignoring case sensitivity using flags with regex.
910
+
911
+ >>> import re
912
+ >>> s1.str.contains('PARROT', flags=re.IGNORECASE, regex=True)
913
+ 0 False
914
+ 1 False
915
+ 2 True
916
+ 3 False
917
+ 4 None
918
+ dtype: object
919
+
920
+ Returning any digit using regular expression.
921
+
922
+ >>> s1.str.contains('[0-9]', regex=True)
923
+ 0 False
924
+ 1 False
925
+ 2 False
926
+ 3 True
927
+ 4 None
928
+ dtype: object
929
+
930
+ Ensure pat is a not a literal pattern when regex is set to True.
931
+ Note in the following example one might expect only s2[1] and s2[3]
932
+ to return True. However, ‘.0’ as a regex matches any character followed
933
+ by a 0.
934
+
935
+ >>> s2 = ps.Series(['40','40.0','41','41.0','35'])
936
+ >>> s2.str.contains('.0', regex=True)
937
+ 0 True
938
+ 1 True
939
+ 2 False
940
+ 3 True
941
+ 4 False
942
+ dtype: bool
943
+ """
944
+
945
+ def pandas_contains(s) -> ps.Series[bool]: # type: ignore[no-untyped-def]
946
+ return s.str.contains(pat, case, flags, na, regex)
947
+
948
+ return self._data.pandas_on_spark.transform_batch(pandas_contains)
949
+
950
+ def count(self, pat: str, flags: int = 0) -> "ps.Series":
951
+ """
952
+ Count occurrences of pattern in each string of the Series.
953
+
954
+ This function is used to count the number of times a particular regex
955
+ pattern is repeated in each of the string elements of the Series.
956
+
957
+ Parameters
958
+ ----------
959
+ pat : str
960
+ Valid regular expression.
961
+ flags : int, default 0 (no flags)
962
+ Flags for the re module.
963
+
964
+ Returns
965
+ -------
966
+ Series of int
967
+ A Series containing the integer counts of pattern matches.
968
+
969
+ Examples
970
+ --------
971
+ >>> s = ps.Series(['A', 'B', 'Aaba', 'Baca', np.NaN, 'CABA', 'cat'])
972
+ >>> s.str.count('a')
973
+ 0 0.0
974
+ 1 0.0
975
+ 2 2.0
976
+ 3 2.0
977
+ 4 NaN
978
+ 5 0.0
979
+ 6 1.0
980
+ dtype: float64
981
+
982
+ Escape '$' to find the literal dollar sign.
983
+
984
+ >>> s = ps.Series(['$', 'B', 'Aab$', '$$ca', 'C$B$', 'cat'])
985
+ >>> s.str.count('\\$')
986
+ 0 1
987
+ 1 0
988
+ 2 1
989
+ 3 2
990
+ 4 2
991
+ 5 0
992
+ dtype: int64
993
+ """
994
+
995
+ def pandas_count(s) -> ps.Series[int]: # type: ignore[no-untyped-def]
996
+ return s.str.count(pat, flags)
997
+
998
+ return self._data.pandas_on_spark.transform_batch(pandas_count)
999
+
1000
+ @no_type_check
1001
+ def decode(self, encoding, errors="strict") -> "ps.Series":
1002
+ """
1003
+ Not supported.
1004
+ """
1005
+ raise NotImplementedError()
1006
+
1007
+ @no_type_check
1008
+ def encode(self, encoding, errors="strict") -> "ps.Series":
1009
+ """
1010
+ Not supported.
1011
+ """
1012
+ raise NotImplementedError()
1013
+
1014
+ @no_type_check
1015
+ def extract(self, pat, flags=0, expand=True) -> "ps.Series":
1016
+ """
1017
+ Not supported.
1018
+ """
1019
+ raise NotImplementedError()
1020
+
1021
+ @no_type_check
1022
+ def extractall(self, pat, flags=0) -> "ps.Series":
1023
+ """
1024
+ Not supported.
1025
+ """
1026
+ raise NotImplementedError()
1027
+
1028
+ def find(self, sub: str, start: int = 0, end: Optional[int] = None) -> "ps.Series":
1029
+ """
1030
+ Return lowest indexes in each string in the Series where the
1031
+ substring is fully contained between [start:end].
1032
+
1033
+ Return -1 on failure. Equivalent to standard :func:`str.find`.
1034
+
1035
+ Parameters
1036
+ ----------
1037
+ sub : str
1038
+ Substring being searched.
1039
+ start : int
1040
+ Left edge index.
1041
+ end : int
1042
+ Right edge index.
1043
+
1044
+ Returns
1045
+ -------
1046
+ Series of int
1047
+ Series of lowest matching indexes.
1048
+
1049
+ Examples
1050
+ --------
1051
+ >>> s = ps.Series(['apple', 'oranges', 'bananas'])
1052
+
1053
+ >>> s.str.find('a')
1054
+ 0 0
1055
+ 1 2
1056
+ 2 1
1057
+ dtype: int64
1058
+
1059
+ >>> s.str.find('a', start=2)
1060
+ 0 -1
1061
+ 1 2
1062
+ 2 3
1063
+ dtype: int64
1064
+
1065
+ >>> s.str.find('a', end=1)
1066
+ 0 0
1067
+ 1 -1
1068
+ 2 -1
1069
+ dtype: int64
1070
+
1071
+ >>> s.str.find('a', start=2, end=2)
1072
+ 0 -1
1073
+ 1 -1
1074
+ 2 -1
1075
+ dtype: int64
1076
+ """
1077
+
1078
+ def pandas_find(s) -> ps.Series[int]: # type: ignore[no-untyped-def]
1079
+ return s.str.find(sub, start, end)
1080
+
1081
+ return self._data.pandas_on_spark.transform_batch(pandas_find)
1082
+
1083
+ def findall(self, pat: str, flags: int = 0) -> "ps.Series":
1084
+ """
1085
+ Find all occurrences of pattern or regular expression in the Series.
1086
+
1087
+ Equivalent to applying :func:`re.findall` to all the elements in
1088
+ the Series.
1089
+
1090
+ Parameters
1091
+ ----------
1092
+ pat : str
1093
+ Pattern or regular expression.
1094
+ flags : int, default 0 (no flags)
1095
+ `re` module flags, e.g. `re.IGNORECASE`.
1096
+
1097
+ Returns
1098
+ -------
1099
+ Series of object
1100
+ All non-overlapping matches of pattern or regular expression in
1101
+ each string of this Series.
1102
+
1103
+ Examples
1104
+ --------
1105
+ >>> s = ps.Series(['Lion', 'Monkey', 'Rabbit'])
1106
+
1107
+ The search for the pattern ‘Monkey’ returns one match:
1108
+
1109
+ >>> s.str.findall('Monkey')
1110
+ 0 []
1111
+ 1 [Monkey]
1112
+ 2 []
1113
+ dtype: object
1114
+
1115
+ On the other hand, the search for the pattern ‘MONKEY’ doesn’t return
1116
+ any match:
1117
+
1118
+ >>> s.str.findall('MONKEY')
1119
+ 0 []
1120
+ 1 []
1121
+ 2 []
1122
+ dtype: object
1123
+
1124
+ Flags can be added to the pattern or regular expression. For instance,
1125
+ to find the pattern ‘MONKEY’ ignoring the case:
1126
+
1127
+ >>> import re
1128
+ >>> s.str.findall('MONKEY', flags=re.IGNORECASE)
1129
+ 0 []
1130
+ 1 [Monkey]
1131
+ 2 []
1132
+ dtype: object
1133
+
1134
+ When the pattern matches more than one string in the Series, all
1135
+ matches are returned:
1136
+
1137
+ >>> s.str.findall('on')
1138
+ 0 [on]
1139
+ 1 [on]
1140
+ 2 []
1141
+ dtype: object
1142
+
1143
+ Regular expressions are supported too. For instance, the search for all
1144
+ the strings ending with the word ‘on’ is shown next:
1145
+
1146
+ >>> s.str.findall('on$')
1147
+ 0 [on]
1148
+ 1 []
1149
+ 2 []
1150
+ dtype: object
1151
+
1152
+ If the pattern is found more than once in the same string, then a list
1153
+ of multiple strings is returned:
1154
+
1155
+ >>> s.str.findall('b')
1156
+ 0 []
1157
+ 1 []
1158
+ 2 [b, b]
1159
+ dtype: object
1160
+ """
1161
+ # type hint does not support to specify array type yet.
1162
+ @pandas_udf( # type: ignore[call-overload]
1163
+ returnType=ArrayType(StringType(), containsNull=True)
1164
+ )
1165
+ def pudf(s: pd.Series) -> pd.Series:
1166
+ return s.str.findall(pat, flags)
1167
+
1168
+ return self._data._with_new_scol(scol=pudf(self._data.spark.column))
1169
+
1170
+ def index(self, sub: str, start: int = 0, end: Optional[int] = None) -> "ps.Series":
1171
+ """
1172
+ Return lowest indexes in each string where the substring is fully
1173
+ contained between [start:end].
1174
+
1175
+ This is the same as :func:`str.find` except instead of returning -1,
1176
+ it raises a ValueError when the substring is not found. Equivalent to
1177
+ standard :func:`str.index`.
1178
+
1179
+ Parameters
1180
+ ----------
1181
+ sub : str
1182
+ Substring being searched.
1183
+ start : int
1184
+ Left edge index.
1185
+ end : int
1186
+ Right edge index.
1187
+
1188
+ Returns
1189
+ -------
1190
+ Series of int
1191
+ Series of lowest matching indexes.
1192
+
1193
+ Examples
1194
+ --------
1195
+ >>> s = ps.Series(['apple', 'oranges', 'bananas'])
1196
+
1197
+ >>> s.str.index('a')
1198
+ 0 0
1199
+ 1 2
1200
+ 2 1
1201
+ dtype: int64
1202
+
1203
+ The following expression throws an exception:
1204
+
1205
+ >>> s.str.index('a', start=2) # doctest: +SKIP
1206
+ """
1207
+
1208
+ def pandas_index(s) -> ps.Series[np.int64]: # type: ignore[no-untyped-def]
1209
+ return s.str.index(sub, start, end)
1210
+
1211
+ return self._data.pandas_on_spark.transform_batch(pandas_index)
1212
+
1213
+ def join(self, sep: str) -> "ps.Series":
1214
+ """
1215
+ Join lists contained as elements in the Series with passed delimiter.
1216
+
1217
+ If the elements of a Series are lists themselves, join the content of
1218
+ these lists using the delimiter passed to the function. This function
1219
+ is an equivalent to calling :func:`str.join` on the lists.
1220
+
1221
+ Parameters
1222
+ ----------
1223
+ sep : str
1224
+ Delimiter to use between list entries.
1225
+
1226
+ Returns
1227
+ -------
1228
+ Series of object
1229
+ Series with list entries concatenated by intervening occurrences of
1230
+ the delimiter.
1231
+
1232
+ See Also
1233
+ --------
1234
+ str.split : Split strings around given separator/delimiter.
1235
+ str.rsplit : Splits string around given separator/delimiter,
1236
+ starting from the right.
1237
+
1238
+ Examples
1239
+ --------
1240
+ Example with a list that contains a None element.
1241
+
1242
+ >>> s = ps.Series([['lion', 'elephant', 'zebra'],
1243
+ ... ['cat', None, 'dog']])
1244
+ >>> s
1245
+ 0 [lion, elephant, zebra]
1246
+ 1 [cat, None, dog]
1247
+ dtype: object
1248
+
1249
+ Join all lists using a ‘-‘. The list containing None will produce None.
1250
+
1251
+ >>> s.str.join('-')
1252
+ 0 lion-elephant-zebra
1253
+ 1 None
1254
+ dtype: object
1255
+ """
1256
+
1257
+ def pandas_join(s) -> ps.Series[str]: # type: ignore[no-untyped-def]
1258
+ return s.str.join(sep)
1259
+
1260
+ return self._data.pandas_on_spark.transform_batch(pandas_join)
1261
+
1262
+ def len(self) -> "ps.Series":
1263
+ """
1264
+ Computes the length of each element in the Series.
1265
+
1266
+ The element may be a sequence (such as a string, tuple or list).
1267
+
1268
+ Returns
1269
+ -------
1270
+ Series of int
1271
+ A Series of integer values indicating the length of each element in
1272
+ the Series.
1273
+
1274
+ Examples
1275
+ --------
1276
+ Returns the length (number of characters) in a string. Returns the
1277
+ number of entries for lists or tuples.
1278
+
1279
+ >>> s1 = ps.Series(['dog', 'monkey'])
1280
+ >>> s1.str.len()
1281
+ 0 3
1282
+ 1 6
1283
+ dtype: int64
1284
+
1285
+ >>> s2 = ps.Series([["a", "b", "c"], []])
1286
+ >>> s2.str.len()
1287
+ 0 3
1288
+ 1 0
1289
+ dtype: int64
1290
+ """
1291
+ if isinstance(self._data.spark.data_type, (ArrayType, MapType)):
1292
+ return self._data.spark.transform(lambda c: F.size(c).cast(LongType()))
1293
+ else:
1294
+ return self._data.spark.transform(lambda c: F.length(c).cast(LongType()))
1295
+
1296
+ def ljust(self, width: int, fillchar: str = " ") -> "ps.Series":
1297
+ """
1298
+ Filling right side of strings in the Series with an additional
1299
+ character. Equivalent to :func:`str.ljust`.
1300
+
1301
+ Parameters
1302
+ ----------
1303
+ width : int
1304
+ Minimum width of resulting string; additional characters will be
1305
+ filled with `fillchar`.
1306
+ fillchar : str
1307
+ Additional character for filling, default is whitespace.
1308
+
1309
+ Returns
1310
+ -------
1311
+ Series of object
1312
+
1313
+ Examples
1314
+ --------
1315
+ >>> s = ps.Series(["caribou", "tiger"])
1316
+ >>> s
1317
+ 0 caribou
1318
+ 1 tiger
1319
+ dtype: object
1320
+
1321
+ >>> s.str.ljust(width=10, fillchar='-')
1322
+ 0 caribou---
1323
+ 1 tiger-----
1324
+ dtype: object
1325
+ """
1326
+
1327
+ def pandas_ljust(s) -> ps.Series[str]: # type: ignore[no-untyped-def]
1328
+ return s.str.ljust(width, fillchar)
1329
+
1330
+ return self._data.pandas_on_spark.transform_batch(pandas_ljust)
1331
+
1332
+ def match(self, pat: str, case: bool = True, flags: int = 0, na: Any = np.NaN) -> "ps.Series":
1333
+ """
1334
+ Determine if each string matches a regular expression.
1335
+
1336
+ Analogous to :func:`contains`, but more strict, relying on
1337
+ :func:`re.match` instead of :func:`re.search`.
1338
+
1339
+ Parameters
1340
+ ----------
1341
+ pat : str
1342
+ Character sequence or regular expression.
1343
+ case : bool, default True
1344
+ If True, case sensitive.
1345
+ flags : int, default 0 (no flags)
1346
+ Flags to pass through to the re module, e.g. re.IGNORECASE.
1347
+ na : default NaN
1348
+ Fill value for missing values.
1349
+
1350
+ Returns
1351
+ -------
1352
+ Series of boolean values or object
1353
+ A Series of boolean values indicating whether the given pattern can
1354
+ be matched in the string of each element of the Series.
1355
+
1356
+ Examples
1357
+ --------
1358
+ >>> s = ps.Series(['Mouse', 'dog', 'house and parrot', '23', np.NaN])
1359
+ >>> s.str.match('dog')
1360
+ 0 False
1361
+ 1 True
1362
+ 2 False
1363
+ 3 False
1364
+ 4 None
1365
+ dtype: object
1366
+
1367
+ >>> s.str.match('mouse|dog', case=False)
1368
+ 0 True
1369
+ 1 True
1370
+ 2 False
1371
+ 3 False
1372
+ 4 None
1373
+ dtype: object
1374
+
1375
+ >>> s.str.match('.+and.+', na=True)
1376
+ 0 False
1377
+ 1 False
1378
+ 2 True
1379
+ 3 False
1380
+ 4 True
1381
+ dtype: bool
1382
+
1383
+ >>> import re
1384
+ >>> s.str.match('MOUSE', flags=re.IGNORECASE)
1385
+ 0 True
1386
+ 1 False
1387
+ 2 False
1388
+ 3 False
1389
+ 4 None
1390
+ dtype: object
1391
+ """
1392
+
1393
+ def pandas_match(s) -> ps.Series[bool]: # type: ignore[no-untyped-def]
1394
+ return s.str.match(pat, case, flags, na)
1395
+
1396
+ return self._data.pandas_on_spark.transform_batch(pandas_match)
1397
+
1398
+ def normalize(self, form: str) -> "ps.Series":
1399
+ """
1400
+ Return the Unicode normal form for the strings in the Series.
1401
+
1402
+ For more information on the forms, see the
1403
+ :func:`unicodedata.normalize`.
1404
+
1405
+ Parameters
1406
+ ----------
1407
+ form : {‘NFC’, ‘NFKC’, ‘NFD’, ‘NFKD’}
1408
+ Unicode form.
1409
+
1410
+ Returns
1411
+ -------
1412
+ Series of objects
1413
+ A Series of normalized strings.
1414
+ """
1415
+
1416
+ def pandas_normalize(s) -> ps.Series[str]: # type: ignore[no-untyped-def]
1417
+ return s.str.normalize(form)
1418
+
1419
+ return self._data.pandas_on_spark.transform_batch(pandas_normalize)
1420
+
1421
+ def pad(self, width: int, side: str = "left", fillchar: str = " ") -> "ps.Series":
1422
+ """
1423
+ Pad strings in the Series up to width.
1424
+
1425
+ Parameters
1426
+ ----------
1427
+ width : int
1428
+ Minimum width of resulting string; additional characters will be
1429
+ filled with character defined in `fillchar`.
1430
+ side : {‘left’, ‘right’, ‘both’}, default ‘left’
1431
+ Side from which to fill resulting string.
1432
+ fillchar : str, default ' '
1433
+ Additional character for filling, default is whitespace.
1434
+
1435
+ Returns
1436
+ -------
1437
+ Series of object
1438
+ Returns Series with minimum number of char in object.
1439
+
1440
+ Examples
1441
+ --------
1442
+ >>> s = ps.Series(["caribou", "tiger"])
1443
+ >>> s
1444
+ 0 caribou
1445
+ 1 tiger
1446
+ dtype: object
1447
+
1448
+ >>> s.str.pad(width=10)
1449
+ 0 caribou
1450
+ 1 tiger
1451
+ dtype: object
1452
+
1453
+ >>> s.str.pad(width=10, side='right', fillchar='-')
1454
+ 0 caribou---
1455
+ 1 tiger-----
1456
+ dtype: object
1457
+
1458
+ >>> s.str.pad(width=10, side='both', fillchar='-')
1459
+ 0 -caribou--
1460
+ 1 --tiger---
1461
+ dtype: object
1462
+ """
1463
+
1464
+ def pandas_pad(s) -> ps.Series[str]: # type: ignore[no-untyped-def]
1465
+ return s.str.pad(width, side, fillchar)
1466
+
1467
+ return self._data.pandas_on_spark.transform_batch(pandas_pad)
1468
+
1469
+ def partition(self, sep: str = " ", expand: bool = True) -> "ps.Series":
1470
+ """
1471
+ Not supported.
1472
+ """
1473
+ raise NotImplementedError()
1474
+
1475
+ def repeat(self, repeats: int) -> "ps.Series":
1476
+ """
1477
+ Duplicate each string in the Series.
1478
+
1479
+ Parameters
1480
+ ----------
1481
+ repeats : int
1482
+ Repeat the string given number of times (int). Sequence of int
1483
+ is not supported.
1484
+
1485
+ Returns
1486
+ -------
1487
+ Series of object
1488
+ Series or Index of repeated string objects specified by input
1489
+ parameter repeats.
1490
+
1491
+ Examples
1492
+ --------
1493
+ >>> s = ps.Series(['a', 'b', 'c'])
1494
+ >>> s
1495
+ 0 a
1496
+ 1 b
1497
+ 2 c
1498
+ dtype: object
1499
+
1500
+ Single int repeats string in Series
1501
+
1502
+ >>> s.str.repeat(repeats=2)
1503
+ 0 aa
1504
+ 1 bb
1505
+ 2 cc
1506
+ dtype: object
1507
+ """
1508
+ if not isinstance(repeats, int):
1509
+ raise TypeError("repeats expects an int parameter")
1510
+ return self._data.spark.transform(lambda c: SF.repeat(col=c, n=repeats))
1511
+
1512
+ def replace(
1513
+ self,
1514
+ pat: str,
1515
+ repl: Union[str, Callable[[str], str]],
1516
+ n: int = -1,
1517
+ case: Optional[bool] = None,
1518
+ flags: int = 0,
1519
+ regex: bool = True,
1520
+ ) -> "ps.Series":
1521
+ """
1522
+ Replace occurrences of pattern/regex in the Series with some other
1523
+ string. Equivalent to :func:`str.replace` or :func:`re.sub`.
1524
+
1525
+ Parameters
1526
+ ----------
1527
+ pat : str or compiled regex
1528
+ String can be a character sequence or regular expression.
1529
+ repl : str or callable
1530
+ Replacement string or a callable. The callable is passed the regex
1531
+ match object and must return a replacement string to be used. See
1532
+ :func:`re.sub`.
1533
+ n : int, default -1 (all)
1534
+ Number of replacements to make from start.
1535
+ case : boolean, default None
1536
+ If True, case sensitive (the default if pat is a string).
1537
+ Set to False for case insensitive.
1538
+ Cannot be set if pat is a compiled regex.
1539
+ flags: int, default 0 (no flags)
1540
+ re module flags, e.g. re.IGNORECASE.
1541
+ Cannot be set if pat is a compiled regex.
1542
+ regex : boolean, default True
1543
+ If True, assumes the passed-in pattern is a regular expression.
1544
+ If False, treats the pattern as a literal string.
1545
+ Cannot be set to False if pat is a compile regex or repl is a
1546
+ callable.
1547
+
1548
+ Returns
1549
+ -------
1550
+ Series of object
1551
+ A copy of the string with all matching occurrences of pat replaced
1552
+ by repl.
1553
+
1554
+ Examples
1555
+ --------
1556
+ When pat is a string and regex is True (the default), the given pat is
1557
+ compiled as a regex. When repl is a string, it replaces matching regex
1558
+ patterns as with :func:`re.sub`. NaN value(s) in the Series are changed
1559
+ to None:
1560
+
1561
+ >>> ps.Series(['foo', 'fuz', np.nan]).str.replace('f.', 'ba', regex=True)
1562
+ 0 bao
1563
+ 1 baz
1564
+ 2 None
1565
+ dtype: object
1566
+
1567
+ When pat is a string and regex is False, every pat is replaced with
1568
+ repl as with :func:`str.replace`:
1569
+
1570
+ >>> ps.Series(['f.o', 'fuz', np.nan]).str.replace('f.', 'ba', regex=False)
1571
+ 0 bao
1572
+ 1 fuz
1573
+ 2 None
1574
+ dtype: object
1575
+
1576
+ When repl is a callable, it is called on every pat using
1577
+ :func:`re.sub`. The callable should expect one positional argument (a
1578
+ regex object) and return a string.
1579
+
1580
+ Reverse every lowercase alphabetic word:
1581
+
1582
+ >>> repl = lambda m: m.group(0)[::-1]
1583
+ >>> ps.Series(['foo 123', 'bar baz', np.nan]).str.replace(r'[a-z]+', repl)
1584
+ 0 oof 123
1585
+ 1 rab zab
1586
+ 2 None
1587
+ dtype: object
1588
+
1589
+ Using regex groups (extract second group and swap case):
1590
+
1591
+ >>> pat = r"(?P<one>\\w+) (?P<two>\\w+) (?P<three>\\w+)"
1592
+ >>> repl = lambda m: m.group('two').swapcase()
1593
+ >>> ps.Series(['One Two Three', 'Foo Bar Baz']).str.replace(pat, repl)
1594
+ 0 tWO
1595
+ 1 bAR
1596
+ dtype: object
1597
+
1598
+ Using a compiled regex with flags:
1599
+
1600
+ >>> import re
1601
+ >>> regex_pat = re.compile(r'FUZ', flags=re.IGNORECASE)
1602
+ >>> ps.Series(['foo', 'fuz', np.nan]).str.replace(regex_pat, 'bar')
1603
+ 0 foo
1604
+ 1 bar
1605
+ 2 None
1606
+ dtype: object
1607
+ """
1608
+ warnings.warn(
1609
+ "Default value of `regex` will be changed to `False` instead of `True` in 4.0.0.",
1610
+ FutureWarning,
1611
+ )
1612
+
1613
+ def pandas_replace(s) -> ps.Series[str]: # type: ignore[no-untyped-def]
1614
+ return s.str.replace(pat, repl, n=n, case=case, flags=flags, regex=regex)
1615
+
1616
+ return self._data.pandas_on_spark.transform_batch(pandas_replace)
1617
+
1618
+ def rfind(self, sub: str, start: int = 0, end: Optional[int] = None) -> "ps.Series":
1619
+ """
1620
+ Return highest indexes in each string in the Series where the
1621
+ substring is fully contained between [start:end].
1622
+
1623
+ Return -1 on failure. Equivalent to standard :func:`str.rfind`.
1624
+
1625
+ Parameters
1626
+ ----------
1627
+ sub : str
1628
+ Substring being searched.
1629
+ start : int
1630
+ Left edge index.
1631
+ end : int
1632
+ Right edge index.
1633
+
1634
+ Returns
1635
+ -------
1636
+ Series of int
1637
+ Series of highest matching indexes.
1638
+
1639
+ Examples
1640
+ --------
1641
+ >>> s = ps.Series(['apple', 'oranges', 'bananas'])
1642
+
1643
+ >>> s.str.rfind('a')
1644
+ 0 0
1645
+ 1 2
1646
+ 2 5
1647
+ dtype: int64
1648
+
1649
+ >>> s.str.rfind('a', start=2)
1650
+ 0 -1
1651
+ 1 2
1652
+ 2 5
1653
+ dtype: int64
1654
+
1655
+ >>> s.str.rfind('a', end=1)
1656
+ 0 0
1657
+ 1 -1
1658
+ 2 -1
1659
+ dtype: int64
1660
+
1661
+ >>> s.str.rfind('a', start=2, end=2)
1662
+ 0 -1
1663
+ 1 -1
1664
+ 2 -1
1665
+ dtype: int64
1666
+ """
1667
+
1668
+ def pandas_rfind(s) -> ps.Series[int]: # type: ignore[no-untyped-def]
1669
+ return s.str.rfind(sub, start, end)
1670
+
1671
+ return self._data.pandas_on_spark.transform_batch(pandas_rfind)
1672
+
1673
+ def rindex(self, sub: str, start: int = 0, end: Optional[int] = None) -> "ps.Series":
1674
+ """
1675
+ Return highest indexes in each string where the substring is fully
1676
+ contained between [start:end].
1677
+
1678
+ This is the same as :func:`str.rfind` except instead of returning -1,
1679
+ it raises a ValueError when the substring is not found. Equivalent to
1680
+ standard :func:`str.rindex`.
1681
+
1682
+ Parameters
1683
+ ----------
1684
+ sub : str
1685
+ Substring being searched.
1686
+ start : int
1687
+ Left edge index.
1688
+ end : int
1689
+ Right edge index.
1690
+
1691
+ Returns
1692
+ -------
1693
+ Series of int
1694
+ Series of highest matching indexes.
1695
+
1696
+ Examples
1697
+ --------
1698
+ >>> s = ps.Series(['apple', 'oranges', 'bananas'])
1699
+
1700
+ >>> s.str.rindex('a')
1701
+ 0 0
1702
+ 1 2
1703
+ 2 5
1704
+ dtype: int64
1705
+
1706
+ The following expression throws an exception:
1707
+
1708
+ >>> s.str.rindex('a', start=2) # doctest: +SKIP
1709
+ """
1710
+
1711
+ def pandas_rindex(s) -> ps.Series[np.int64]: # type: ignore[no-untyped-def]
1712
+ return s.str.rindex(sub, start, end)
1713
+
1714
+ return self._data.pandas_on_spark.transform_batch(pandas_rindex)
1715
+
1716
+ def rjust(self, width: int, fillchar: str = " ") -> "ps.Series":
1717
+ """
1718
+ Filling left side of strings in the Series with an additional
1719
+ character. Equivalent to :func:`str.rjust`.
1720
+
1721
+ Parameters
1722
+ ----------
1723
+ width : int
1724
+ Minimum width of resulting string; additional characters will be
1725
+ filled with `fillchar`.
1726
+ fillchar : str
1727
+ Additional character for filling, default is whitespace.
1728
+
1729
+ Returns
1730
+ -------
1731
+ Series of object
1732
+
1733
+ Examples
1734
+ --------
1735
+ >>> s = ps.Series(["caribou", "tiger"])
1736
+ >>> s
1737
+ 0 caribou
1738
+ 1 tiger
1739
+ dtype: object
1740
+
1741
+ >>> s.str.rjust(width=10)
1742
+ 0 caribou
1743
+ 1 tiger
1744
+ dtype: object
1745
+
1746
+ >>> s.str.rjust(width=10, fillchar='-')
1747
+ 0 ---caribou
1748
+ 1 -----tiger
1749
+ dtype: object
1750
+ """
1751
+
1752
+ def pandas_rjust(s) -> ps.Series[str]: # type: ignore[no-untyped-def]
1753
+ return s.str.rjust(width, fillchar)
1754
+
1755
+ return self._data.pandas_on_spark.transform_batch(pandas_rjust)
1756
+
1757
+ def rpartition(self, sep: str = " ", expand: bool = True) -> "ps.Series":
1758
+ """
1759
+ Not supported.
1760
+ """
1761
+ raise NotImplementedError()
1762
+
1763
+ def slice(
1764
+ self, start: Optional[int] = None, stop: Optional[int] = None, step: Optional[int] = None
1765
+ ) -> "ps.Series":
1766
+ """
1767
+ Slice substrings from each element in the Series.
1768
+
1769
+ Parameters
1770
+ ----------
1771
+ start : int, optional
1772
+ Start position for slice operation.
1773
+ stop : int, optional
1774
+ Stop position for slice operation.
1775
+ step : int, optional
1776
+ Step size for slice operation.
1777
+
1778
+ Returns
1779
+ -------
1780
+ Series of object
1781
+ Series from sliced substrings from original string objects.
1782
+
1783
+ Examples
1784
+ --------
1785
+ >>> s = ps.Series(["koala", "fox", "chameleon"])
1786
+ >>> s
1787
+ 0 koala
1788
+ 1 fox
1789
+ 2 chameleon
1790
+ dtype: object
1791
+
1792
+ >>> s.str.slice(start=1)
1793
+ 0 oala
1794
+ 1 ox
1795
+ 2 hameleon
1796
+ dtype: object
1797
+
1798
+ >>> s.str.slice(stop=2)
1799
+ 0 ko
1800
+ 1 fo
1801
+ 2 ch
1802
+ dtype: object
1803
+
1804
+ >>> s.str.slice(step=2)
1805
+ 0 kaa
1806
+ 1 fx
1807
+ 2 caeen
1808
+ dtype: object
1809
+
1810
+ >>> s.str.slice(start=0, stop=5, step=3)
1811
+ 0 kl
1812
+ 1 f
1813
+ 2 cm
1814
+ dtype: object
1815
+ """
1816
+
1817
+ def pandas_slice(s) -> ps.Series[str]: # type: ignore[no-untyped-def]
1818
+ return s.str.slice(start, stop, step)
1819
+
1820
+ return self._data.pandas_on_spark.transform_batch(pandas_slice)
1821
+
1822
+ def slice_replace(
1823
+ self, start: Optional[int] = None, stop: Optional[int] = None, repl: Optional[str] = None
1824
+ ) -> "ps.Series":
1825
+ """
1826
+ Slice substrings from each element in the Series.
1827
+
1828
+ Parameters
1829
+ ----------
1830
+ start : int, optional
1831
+ Start position for slice operation. If not specified (None), the
1832
+ slice is unbounded on the left, i.e. slice from the start of the
1833
+ string.
1834
+ stop : int, optional
1835
+ Stop position for slice operation. If not specified (None), the
1836
+ slice is unbounded on the right, i.e. slice until the end of the
1837
+ string.
1838
+ repl : str, optional
1839
+ String for replacement. If not specified (None), the sliced region
1840
+ is replaced with an empty string.
1841
+
1842
+ Returns
1843
+ -------
1844
+ Series of object
1845
+ Series from sliced substrings from original string objects.
1846
+
1847
+ Examples
1848
+ --------
1849
+ >>> s = ps.Series(['a', 'ab', 'abc', 'abdc', 'abcde'])
1850
+ >>> s
1851
+ 0 a
1852
+ 1 ab
1853
+ 2 abc
1854
+ 3 abdc
1855
+ 4 abcde
1856
+ dtype: object
1857
+
1858
+ Specify just start, meaning replace start until the end of the string
1859
+ with repl.
1860
+
1861
+ >>> s.str.slice_replace(1, repl='X')
1862
+ 0 aX
1863
+ 1 aX
1864
+ 2 aX
1865
+ 3 aX
1866
+ 4 aX
1867
+ dtype: object
1868
+
1869
+ Specify just stop, meaning the start of the string to stop is replaced
1870
+ with repl, and the rest of the string is included.
1871
+
1872
+ >>> s.str.slice_replace(stop=2, repl='X')
1873
+ 0 X
1874
+ 1 X
1875
+ 2 Xc
1876
+ 3 Xdc
1877
+ 4 Xcde
1878
+ dtype: object
1879
+
1880
+ Specify start and stop, meaning the slice from start to stop is
1881
+ replaced with repl. Everything before or after start and stop is
1882
+ included as is.
1883
+
1884
+ >>> s.str.slice_replace(start=1, stop=3, repl='X')
1885
+ 0 aX
1886
+ 1 aX
1887
+ 2 aX
1888
+ 3 aXc
1889
+ 4 aXde
1890
+ dtype: object
1891
+ """
1892
+
1893
+ def pandas_slice_replace(s) -> ps.Series[str]: # type: ignore[no-untyped-def]
1894
+ return s.str.slice_replace(start, stop, repl)
1895
+
1896
+ return self._data.pandas_on_spark.transform_batch(pandas_slice_replace)
1897
+
1898
+ def split(
1899
+ self, pat: Optional[str] = None, n: int = -1, expand: bool = False
1900
+ ) -> Union["ps.Series", "ps.DataFrame"]:
1901
+ """
1902
+ Split strings around given separator/delimiter.
1903
+
1904
+ Splits the string in the Series from the beginning, at the specified
1905
+ delimiter string. Equivalent to :func:`str.split`.
1906
+
1907
+ Parameters
1908
+ ----------
1909
+ pat : str, optional
1910
+ String or regular expression to split on. If not specified, split
1911
+ on whitespace.
1912
+ n : int, default -1 (all)
1913
+ Limit number of splits in output. None, 0 and -1 will be
1914
+ interpreted as return all splits.
1915
+ expand : bool, default False
1916
+ Expand the split strings into separate columns.
1917
+
1918
+ * If ``True``, `n` must be a positive integer, and return DataFrame expanding
1919
+ dimensionality.
1920
+ * If ``False``, return Series, containing lists of strings.
1921
+
1922
+ Returns
1923
+ -------
1924
+ Series, DataFrame
1925
+ Type matches caller unless `expand=True` (see Notes).
1926
+
1927
+ See Also
1928
+ --------
1929
+ str.rsplit : Splits string around given separator/delimiter,
1930
+ starting from the right.
1931
+ str.join : Join lists contained as elements in the Series/Index
1932
+ with passed delimiter.
1933
+
1934
+ Notes
1935
+ -----
1936
+ The handling of the `n` keyword depends on the number of found splits:
1937
+
1938
+ - If found splits > `n`, make first `n` splits only
1939
+ - If found splits <= `n`, make all splits
1940
+ - If for a certain row the number of found splits < `n`,
1941
+ append `None` for padding up to `n` if ``expand=True``
1942
+
1943
+ If using ``expand=True``, Series callers return DataFrame objects with `n + 1` columns.
1944
+
1945
+ .. note:: Even if `n` is much larger than found splits, the number of columns does NOT
1946
+ shrink unlike pandas.
1947
+
1948
+ Examples
1949
+ --------
1950
+ >>> s = ps.Series(["this is a regular sentence",
1951
+ ... "https://docs.python.org/3/tutorial/index.html",
1952
+ ... np.nan])
1953
+
1954
+ In the default setting, the string is split by whitespace.
1955
+
1956
+ >>> s.str.split() # doctest: +SKIP
1957
+ 0 [this, is, a, regular, sentence]
1958
+ 1 [https://docs.python.org/3/tutorial/index.html]
1959
+ 2 None
1960
+ dtype: object
1961
+
1962
+ Without the n parameter, the outputs of rsplit and split are identical.
1963
+
1964
+ >>> s.str.rsplit() # doctest: +SKIP
1965
+ 0 [this, is, a, regular, sentence]
1966
+ 1 [https://docs.python.org/3/tutorial/index.html]
1967
+ 2 None
1968
+ dtype: object
1969
+
1970
+ The n parameter can be used to limit the number of splits on the
1971
+ delimiter. The outputs of split and rsplit are different.
1972
+
1973
+ >>> s.str.split(n=2) # doctest: +SKIP
1974
+ 0 [this, is, a regular sentence]
1975
+ 1 [https://docs.python.org/3/tutorial/index.html]
1976
+ 2 None
1977
+ dtype: object
1978
+
1979
+ >>> s.str.rsplit(n=2) # doctest: +SKIP
1980
+ 0 [this is a, regular, sentence]
1981
+ 1 [https://docs.python.org/3/tutorial/index.html]
1982
+ 2 None
1983
+ dtype: object
1984
+
1985
+ The pat parameter can be used to split by other characters.
1986
+
1987
+ >>> s.str.split(pat = "/") # doctest: +SKIP
1988
+ 0 [this is a regular sentence]
1989
+ 1 [https:, , docs.python.org, 3, tutorial, index...
1990
+ 2 None
1991
+ dtype: object
1992
+
1993
+ When using ``expand=True``, the split elements will expand out into
1994
+ separate columns. If NaN is present, it is propagated throughout
1995
+ the columns during the split.
1996
+
1997
+ >>> s.str.split(n=4, expand=True) # doctest: +SKIP
1998
+ 0 1 2 3 4
1999
+ 0 this is a regular sentence
2000
+ 1 https://docs.python.org/3/tutorial/index.html None None None None
2001
+ 2 None None None None None
2002
+
2003
+ For slightly more complex use cases like splitting the html document name
2004
+ from a url, a combination of parameter settings can be used.
2005
+
2006
+ >>> s.str.rsplit("/", n=1, expand=True) # doctest: +SKIP
2007
+ 0 1
2008
+ 0 this is a regular sentence None
2009
+ 1 https://docs.python.org/3/tutorial index.html
2010
+ 2 None None
2011
+
2012
+ Remember to escape special characters when explicitly using regular
2013
+ expressions.
2014
+
2015
+ >>> s = ps.Series(["1+1=2"])
2016
+ >>> s.str.split(r"\\+|=", n=2, expand=True) # doctest: +SKIP
2017
+ 0 1 2
2018
+ 0 1 1 2
2019
+ """
2020
+ from pyspark.pandas.frame import DataFrame
2021
+
2022
+ if expand and n <= 0:
2023
+ raise NotImplementedError("expand=True is currently only supported with n > 0.")
2024
+
2025
+ # type hint does not support to specify array type yet.
2026
+ return_type = ArrayType(StringType(), containsNull=True)
2027
+
2028
+ @pandas_udf(returnType=return_type) # type: ignore[call-overload]
2029
+ def pudf(s: pd.Series) -> pd.Series:
2030
+ return s.str.split(pat, n)
2031
+
2032
+ psser = self._data._with_new_scol(
2033
+ pudf(self._data.spark.column).alias(self._data._internal.data_spark_column_names[0]),
2034
+ field=self._data._internal.data_fields[0].copy(spark_type=return_type, nullable=True),
2035
+ )
2036
+
2037
+ if expand:
2038
+ psdf = psser.to_frame()
2039
+ scol = psdf._internal.data_spark_columns[0]
2040
+ spark_columns = [scol[i].alias(str(i)) for i in range(n + 1)]
2041
+ column_labels = [(i,) for i in range(n + 1)]
2042
+ internal = psdf._internal.with_new_columns(
2043
+ spark_columns,
2044
+ column_labels=cast(Optional[List], column_labels),
2045
+ data_fields=[
2046
+ self._data._internal.data_fields[0].copy(name=str(i), nullable=True)
2047
+ for i in range(n + 1)
2048
+ ],
2049
+ )
2050
+ return DataFrame(internal)
2051
+ else:
2052
+ return psser
2053
+
2054
+ def rsplit(
2055
+ self, pat: Optional[str] = None, n: int = -1, expand: bool = False
2056
+ ) -> Union["ps.Series", "ps.DataFrame"]:
2057
+ """
2058
+ Split strings around given separator/delimiter.
2059
+
2060
+ Splits the string in the Series from the end, at the specified
2061
+ delimiter string. Equivalent to :func:`str.rsplit`.
2062
+
2063
+ Parameters
2064
+ ----------
2065
+ pat : str, optional
2066
+ String or regular expression to split on. If not specified, split
2067
+ on whitespace.
2068
+ n : int, default -1 (all)
2069
+ Limit number of splits in output. None, 0 and -1 will be
2070
+ interpreted as return all splits.
2071
+ expand : bool, default False
2072
+ Expand the split strings into separate columns.
2073
+
2074
+ * If ``True``, `n` must be a positive integer, and return DataFrame expanding
2075
+ dimensionality.
2076
+ * If ``False``, return Series, containing lists of strings.
2077
+
2078
+ Returns
2079
+ -------
2080
+ Series, DataFrame
2081
+ Type matches caller unless `expand=True` (see Notes).
2082
+
2083
+ See Also
2084
+ --------
2085
+ str.split : Split strings around given separator/delimiter.
2086
+ str.join : Join lists contained as elements in the Series/Index
2087
+ with passed delimiter.
2088
+
2089
+ Notes
2090
+ -----
2091
+ The handling of the `n` keyword depends on the number of found splits:
2092
+
2093
+ - If found splits > `n`, make first `n` splits only
2094
+ - If found splits <= `n`, make all splits
2095
+ - If for a certain row the number of found splits < `n`,
2096
+ append `None` for padding up to `n` if ``expand=True``
2097
+
2098
+ If using ``expand=True``, Series callers return DataFrame objects with `n + 1` columns.
2099
+
2100
+ .. note:: Even if `n` is much larger than found splits, the number of columns does NOT
2101
+ shrink unlike pandas.
2102
+
2103
+ Examples
2104
+ --------
2105
+ >>> s = ps.Series(["this is a regular sentence",
2106
+ ... "https://docs.python.org/3/tutorial/index.html",
2107
+ ... np.nan])
2108
+
2109
+ In the default setting, the string is split by whitespace.
2110
+
2111
+ >>> s.str.split() # doctest: +SKIP
2112
+ 0 [this, is, a, regular, sentence]
2113
+ 1 [https://docs.python.org/3/tutorial/index.html]
2114
+ 2 None
2115
+ dtype: object
2116
+
2117
+ Without the n parameter, the outputs of rsplit and split are identical.
2118
+
2119
+ >>> s.str.rsplit() # doctest: +SKIP
2120
+ 0 [this, is, a, regular, sentence]
2121
+ 1 [https://docs.python.org/3/tutorial/index.html]
2122
+ 2 None
2123
+ dtype: object
2124
+
2125
+ The n parameter can be used to limit the number of splits on the
2126
+ delimiter. The outputs of split and rsplit are different.
2127
+
2128
+ >>> s.str.split(n=2) # doctest: +SKIP
2129
+ 0 [this, is, a regular sentence]
2130
+ 1 [https://docs.python.org/3/tutorial/index.html]
2131
+ 2 None
2132
+ dtype: object
2133
+
2134
+ >>> s.str.rsplit(n=2) # doctest: +SKIP
2135
+ 0 [this is a, regular, sentence]
2136
+ 1 [https://docs.python.org/3/tutorial/index.html]
2137
+ 2 None
2138
+ dtype: object
2139
+
2140
+ When using ``expand=True``, the split elements will expand out into
2141
+ separate columns. If NaN is present, it is propagated throughout
2142
+ the columns during the split.
2143
+
2144
+ >>> s.str.split(n=4, expand=True) # doctest: +SKIP
2145
+ 0 1 2 3 4
2146
+ 0 this is a regular sentence
2147
+ 1 https://docs.python.org/3/tutorial/index.html None None None None
2148
+ 2 None None None None None
2149
+
2150
+ For slightly more complex use cases like splitting the html document name
2151
+ from a url, a combination of parameter settings can be used.
2152
+
2153
+ >>> s.str.rsplit("/", n=1, expand=True) # doctest: +SKIP
2154
+ 0 1
2155
+ 0 this is a regular sentence None
2156
+ 1 https://docs.python.org/3/tutorial index.html
2157
+ 2 None None
2158
+
2159
+ Remember to escape special characters when explicitly using regular
2160
+ expressions.
2161
+
2162
+ >>> s = ps.Series(["1+1=2"])
2163
+ >>> s.str.split(r"\\+|=", n=2, expand=True) # doctest: +SKIP
2164
+ 0 1 2
2165
+ 0 1 1 2
2166
+ """
2167
+ from pyspark.pandas.frame import DataFrame
2168
+
2169
+ if expand and n <= 0:
2170
+ raise NotImplementedError("expand=True is currently only supported with n > 0.")
2171
+
2172
+ # type hint does not support to specify array type yet.
2173
+ return_type = ArrayType(StringType(), containsNull=True)
2174
+
2175
+ @pandas_udf(returnType=return_type) # type: ignore[call-overload]
2176
+ def pudf(s: pd.Series) -> pd.Series:
2177
+ return s.str.rsplit(pat, n)
2178
+
2179
+ psser = self._data._with_new_scol(
2180
+ pudf(self._data.spark.column).alias(self._data._internal.data_spark_column_names[0]),
2181
+ field=self._data._internal.data_fields[0].copy(spark_type=return_type, nullable=True),
2182
+ )
2183
+
2184
+ if expand:
2185
+ psdf = psser.to_frame()
2186
+ scol = psdf._internal.data_spark_columns[0]
2187
+ spark_columns = [scol[i].alias(str(i)) for i in range(n + 1)]
2188
+ column_labels = [(i,) for i in range(n + 1)]
2189
+ internal = psdf._internal.with_new_columns(
2190
+ spark_columns,
2191
+ column_labels=cast(Optional[List], column_labels),
2192
+ data_fields=[
2193
+ self._data._internal.data_fields[0].copy(name=str(i), nullable=True)
2194
+ for i in range(n + 1)
2195
+ ],
2196
+ )
2197
+ return DataFrame(internal)
2198
+ else:
2199
+ return psser
2200
+
2201
+ def translate(self, table: Dict) -> "ps.Series":
2202
+ """
2203
+ Map all characters in the string through the given mapping table.
2204
+ Equivalent to standard :func:`str.translate`.
2205
+
2206
+ Parameters
2207
+ ----------
2208
+ table : dict
2209
+ Table is a mapping of Unicode ordinals to Unicode ordinals,
2210
+ strings, or None. Unmapped characters are left untouched.
2211
+ Characters mapped to None are deleted. :func:`str.maketrans` is a
2212
+ helper function for making translation tables.
2213
+
2214
+ Returns
2215
+ -------
2216
+ Series of object
2217
+ Series with translated strings.
2218
+
2219
+ Examples
2220
+ --------
2221
+ >>> s = ps.Series(["dog", "cat", "bird"])
2222
+ >>> m = str.maketrans({'a': 'X', 'i': 'Y', 'o': None})
2223
+ >>> s.str.translate(m)
2224
+ 0 dg
2225
+ 1 cXt
2226
+ 2 bYrd
2227
+ dtype: object
2228
+ """
2229
+
2230
+ def pandas_translate(s) -> ps.Series[str]: # type: ignore[no-untyped-def]
2231
+ return s.str.translate(table)
2232
+
2233
+ return self._data.pandas_on_spark.transform_batch(pandas_translate)
2234
+
2235
+ def wrap(self, width: int, **kwargs: bool) -> "ps.Series":
2236
+ """
2237
+ Wrap long strings in the Series to be formatted in paragraphs with
2238
+ length less than a given width.
2239
+
2240
+ This method has the same keyword parameters and defaults as
2241
+ :class:`textwrap.TextWrapper`.
2242
+
2243
+ Parameters
2244
+ ----------
2245
+ width : int
2246
+ Maximum line-width. Lines separated with newline char.
2247
+ expand_tabs : bool, optional
2248
+ If true, tab characters will be expanded to spaces (default: True).
2249
+ replace_whitespace : bool, optional
2250
+ If true, each whitespace character remaining after tab expansion
2251
+ will be replaced by a single space (default: True).
2252
+ drop_whitespace : bool, optional
2253
+ If true, whitespace that, after wrapping, happens to end up at the
2254
+ beginning or end of a line is dropped (default: True).
2255
+ break_long_words : bool, optional
2256
+ If true, then words longer than width will be broken to
2257
+ ensure that no lines are longer than width. If it is false, long
2258
+ words will not be broken, and some lines may be longer than width
2259
+ (default: True).
2260
+ break_on_hyphens : bool, optional
2261
+ If true, wrapping will occur preferably on whitespace and right
2262
+ after hyphens in compound words, as it is customary in English.
2263
+ If false, only whitespaces will be considered as potentially good
2264
+ places for line breaks, but you need to set break_long_words to
2265
+ false if you want truly insecable words (default: True).
2266
+
2267
+ Returns
2268
+ -------
2269
+ Series of object
2270
+ Series with wrapped strings.
2271
+
2272
+ Examples
2273
+ --------
2274
+ >>> s = ps.Series(['line to be wrapped', 'another line to be wrapped'])
2275
+ >>> s.str.wrap(12)
2276
+ 0 line to be\\nwrapped
2277
+ 1 another line\\nto be\\nwrapped
2278
+ dtype: object
2279
+ """
2280
+
2281
+ def pandas_wrap(s) -> ps.Series[str]: # type: ignore[no-untyped-def]
2282
+ return s.str.wrap(width, **kwargs)
2283
+
2284
+ return self._data.pandas_on_spark.transform_batch(pandas_wrap)
2285
+
2286
+ def zfill(self, width: int) -> "ps.Series":
2287
+ """
2288
+ Pad strings in the Series by prepending ‘0’ characters.
2289
+
2290
+ Strings in the Series are padded with ‘0’ characters on the left of the
2291
+ string to reach a total string length width. Strings in the Series with
2292
+ length greater or equal to width are unchanged.
2293
+
2294
+ Differs from :func:`str.zfill` which has special handling for ‘+’/’-‘
2295
+ in the string.
2296
+
2297
+ Parameters
2298
+ ----------
2299
+ width : int
2300
+ Minimum length of resulting string; strings with length less than
2301
+ width be prepended with ‘0’ characters.
2302
+
2303
+ Returns
2304
+ -------
2305
+ Series of object
2306
+ Series with '0' left-padded strings.
2307
+
2308
+ Examples
2309
+ --------
2310
+ >>> s = ps.Series(['-1', '1', '1000', np.nan])
2311
+ >>> s
2312
+ 0 -1
2313
+ 1 1
2314
+ 2 1000
2315
+ 3 None
2316
+ dtype: object
2317
+
2318
+ Note that NaN is not a string, therefore it is converted to NaN. The
2319
+ minus sign in '-1' is treated as a regular character and the zero is
2320
+ added to the left of it (:func:`str.zfill` would have moved it to the
2321
+ left). 1000 remains unchanged as it is longer than width.
2322
+
2323
+ >>> s.str.zfill(3) # doctest: +SKIP
2324
+ 0 -01
2325
+ 1 001
2326
+ 2 1000
2327
+ 3 None
2328
+ dtype: object
2329
+ """
2330
+
2331
+ def pandas_zfill(s) -> ps.Series[str]: # type: ignore[no-untyped-def]
2332
+ return s.str.zfill(width)
2333
+
2334
+ return self._data.pandas_on_spark.transform_batch(pandas_zfill)
2335
+
2336
+ @no_type_check
2337
+ def get_dummies(self, sep: str = "|") -> "ps.DataFrame":
2338
+ """
2339
+ Not supported.
2340
+ """
2341
+ raise NotImplementedError()
2342
+
2343
+
2344
+ def _test() -> None:
2345
+ import os
2346
+ import doctest
2347
+ import sys
2348
+ from pyspark.sql import SparkSession
2349
+ import pyspark.pandas.strings
2350
+
2351
+ os.chdir(os.environ["SPARK_HOME"])
2352
+
2353
+ globs = pyspark.pandas.strings.__dict__.copy()
2354
+ globs["ps"] = pyspark.pandas
2355
+ spark = (
2356
+ SparkSession.builder.master("local[4]")
2357
+ .appName("pyspark.pandas.strings tests")
2358
+ .getOrCreate()
2359
+ )
2360
+ (failure_count, test_count) = doctest.testmod(
2361
+ pyspark.pandas.strings,
2362
+ globs=globs,
2363
+ optionflags=doctest.ELLIPSIS | doctest.NORMALIZE_WHITESPACE,
2364
+ )
2365
+ spark.stop()
2366
+ if failure_count:
2367
+ sys.exit(-1)
2368
+
2369
+
2370
+ if __name__ == "__main__":
2371
+ _test()