snowpark-connect 0.20.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of snowpark-connect might be problematic. Click here for more details.

Files changed (879) hide show
  1. snowflake/snowpark_connect/__init__.py +23 -0
  2. snowflake/snowpark_connect/analyze_plan/__init__.py +3 -0
  3. snowflake/snowpark_connect/analyze_plan/map_tree_string.py +38 -0
  4. snowflake/snowpark_connect/column_name_handler.py +735 -0
  5. snowflake/snowpark_connect/config.py +576 -0
  6. snowflake/snowpark_connect/constants.py +47 -0
  7. snowflake/snowpark_connect/control_server.py +52 -0
  8. snowflake/snowpark_connect/dataframe_name_handler.py +54 -0
  9. snowflake/snowpark_connect/date_time_format_mapping.py +399 -0
  10. snowflake/snowpark_connect/empty_dataframe.py +18 -0
  11. snowflake/snowpark_connect/error/__init__.py +11 -0
  12. snowflake/snowpark_connect/error/error_mapping.py +6174 -0
  13. snowflake/snowpark_connect/error/error_utils.py +321 -0
  14. snowflake/snowpark_connect/error/exceptions.py +24 -0
  15. snowflake/snowpark_connect/execute_plan/__init__.py +3 -0
  16. snowflake/snowpark_connect/execute_plan/map_execution_command.py +204 -0
  17. snowflake/snowpark_connect/execute_plan/map_execution_root.py +173 -0
  18. snowflake/snowpark_connect/execute_plan/utils.py +183 -0
  19. snowflake/snowpark_connect/expression/__init__.py +3 -0
  20. snowflake/snowpark_connect/expression/literal.py +90 -0
  21. snowflake/snowpark_connect/expression/map_cast.py +343 -0
  22. snowflake/snowpark_connect/expression/map_expression.py +293 -0
  23. snowflake/snowpark_connect/expression/map_extension.py +104 -0
  24. snowflake/snowpark_connect/expression/map_sql_expression.py +633 -0
  25. snowflake/snowpark_connect/expression/map_udf.py +142 -0
  26. snowflake/snowpark_connect/expression/map_unresolved_attribute.py +241 -0
  27. snowflake/snowpark_connect/expression/map_unresolved_extract_value.py +85 -0
  28. snowflake/snowpark_connect/expression/map_unresolved_function.py +9450 -0
  29. snowflake/snowpark_connect/expression/map_unresolved_star.py +218 -0
  30. snowflake/snowpark_connect/expression/map_update_fields.py +164 -0
  31. snowflake/snowpark_connect/expression/map_window_function.py +258 -0
  32. snowflake/snowpark_connect/expression/typer.py +125 -0
  33. snowflake/snowpark_connect/includes/__init__.py +0 -0
  34. snowflake/snowpark_connect/includes/jars/antlr4-runtime-4.9.3.jar +0 -0
  35. snowflake/snowpark_connect/includes/jars/commons-cli-1.5.0.jar +0 -0
  36. snowflake/snowpark_connect/includes/jars/commons-codec-1.16.1.jar +0 -0
  37. snowflake/snowpark_connect/includes/jars/commons-collections-3.2.2.jar +0 -0
  38. snowflake/snowpark_connect/includes/jars/commons-collections4-4.4.jar +0 -0
  39. snowflake/snowpark_connect/includes/jars/commons-compiler-3.1.9.jar +0 -0
  40. snowflake/snowpark_connect/includes/jars/commons-compress-1.26.0.jar +0 -0
  41. snowflake/snowpark_connect/includes/jars/commons-crypto-1.1.0.jar +0 -0
  42. snowflake/snowpark_connect/includes/jars/commons-dbcp-1.4.jar +0 -0
  43. snowflake/snowpark_connect/includes/jars/commons-io-2.16.1.jar +0 -0
  44. snowflake/snowpark_connect/includes/jars/commons-lang-2.6.jar +0 -0
  45. snowflake/snowpark_connect/includes/jars/commons-lang3-3.12.0.jar +0 -0
  46. snowflake/snowpark_connect/includes/jars/commons-logging-1.1.3.jar +0 -0
  47. snowflake/snowpark_connect/includes/jars/commons-math3-3.6.1.jar +0 -0
  48. snowflake/snowpark_connect/includes/jars/commons-pool-1.5.4.jar +0 -0
  49. snowflake/snowpark_connect/includes/jars/commons-text-1.10.0.jar +0 -0
  50. snowflake/snowpark_connect/includes/jars/hadoop-client-api-3.3.4.jar +0 -0
  51. snowflake/snowpark_connect/includes/jars/jackson-annotations-2.15.2.jar +0 -0
  52. snowflake/snowpark_connect/includes/jars/jackson-core-2.15.2.jar +0 -0
  53. snowflake/snowpark_connect/includes/jars/jackson-core-asl-1.9.13.jar +0 -0
  54. snowflake/snowpark_connect/includes/jars/jackson-databind-2.15.2.jar +0 -0
  55. snowflake/snowpark_connect/includes/jars/jackson-dataformat-yaml-2.15.2.jar +0 -0
  56. snowflake/snowpark_connect/includes/jars/jackson-datatype-jsr310-2.15.2.jar +0 -0
  57. snowflake/snowpark_connect/includes/jars/jackson-mapper-asl-1.9.13.jar +0 -0
  58. snowflake/snowpark_connect/includes/jars/jackson-module-scala_2.12-2.15.2.jar +0 -0
  59. snowflake/snowpark_connect/includes/jars/json4s-ast_2.12-3.7.0-M11.jar +0 -0
  60. snowflake/snowpark_connect/includes/jars/json4s-core_2.12-3.7.0-M11.jar +0 -0
  61. snowflake/snowpark_connect/includes/jars/json4s-jackson_2.12-3.7.0-M11.jar +0 -0
  62. snowflake/snowpark_connect/includes/jars/json4s-scalap_2.12-3.7.0-M11.jar +0 -0
  63. snowflake/snowpark_connect/includes/jars/kryo-shaded-4.0.2.jar +0 -0
  64. snowflake/snowpark_connect/includes/jars/log4j-1.2-api-2.20.0.jar +0 -0
  65. snowflake/snowpark_connect/includes/jars/log4j-api-2.20.0.jar +0 -0
  66. snowflake/snowpark_connect/includes/jars/log4j-core-2.20.0.jar +0 -0
  67. snowflake/snowpark_connect/includes/jars/log4j-slf4j2-impl-2.20.0.jar +0 -0
  68. snowflake/snowpark_connect/includes/jars/paranamer-2.8.jar +0 -0
  69. snowflake/snowpark_connect/includes/jars/scala-collection-compat_2.12-2.7.0.jar +0 -0
  70. snowflake/snowpark_connect/includes/jars/scala-compiler-2.12.18.jar +0 -0
  71. snowflake/snowpark_connect/includes/jars/scala-library-2.12.18.jar +0 -0
  72. snowflake/snowpark_connect/includes/jars/scala-parser-combinators_2.12-2.3.0.jar +0 -0
  73. snowflake/snowpark_connect/includes/jars/scala-reflect-2.12.18.jar +0 -0
  74. snowflake/snowpark_connect/includes/jars/scala-xml_2.12-2.1.0.jar +0 -0
  75. snowflake/snowpark_connect/includes/jars/slf4j-api-2.0.7.jar +0 -0
  76. snowflake/snowpark_connect/includes/jars/spark-catalyst_2.12-3.5.6.jar +0 -0
  77. snowflake/snowpark_connect/includes/jars/spark-common-utils_2.12-3.5.6.jar +0 -0
  78. snowflake/snowpark_connect/includes/jars/spark-core_2.12-3.5.6.jar +0 -0
  79. snowflake/snowpark_connect/includes/jars/spark-graphx_2.12-3.5.6.jar +0 -0
  80. snowflake/snowpark_connect/includes/jars/spark-hive-thriftserver_2.12-3.5.6.jar +0 -0
  81. snowflake/snowpark_connect/includes/jars/spark-hive_2.12-3.5.6.jar +0 -0
  82. snowflake/snowpark_connect/includes/jars/spark-kubernetes_2.12-3.5.6.jar +0 -0
  83. snowflake/snowpark_connect/includes/jars/spark-kvstore_2.12-3.5.6.jar +0 -0
  84. snowflake/snowpark_connect/includes/jars/spark-launcher_2.12-3.5.6.jar +0 -0
  85. snowflake/snowpark_connect/includes/jars/spark-mesos_2.12-3.5.6.jar +0 -0
  86. snowflake/snowpark_connect/includes/jars/spark-mllib-local_2.12-3.5.6.jar +0 -0
  87. snowflake/snowpark_connect/includes/jars/spark-mllib_2.12-3.5.6.jar +0 -0
  88. snowflake/snowpark_connect/includes/jars/spark-network-common_2.12-3.5.6.jar +0 -0
  89. snowflake/snowpark_connect/includes/jars/spark-network-shuffle_2.12-3.5.6.jar +0 -0
  90. snowflake/snowpark_connect/includes/jars/spark-repl_2.12-3.5.6.jar +0 -0
  91. snowflake/snowpark_connect/includes/jars/spark-sketch_2.12-3.5.6.jar +0 -0
  92. snowflake/snowpark_connect/includes/jars/spark-sql-api_2.12-3.5.6.jar +0 -0
  93. snowflake/snowpark_connect/includes/jars/spark-sql_2.12-3.5.6.jar +0 -0
  94. snowflake/snowpark_connect/includes/jars/spark-streaming_2.12-3.5.6.jar +0 -0
  95. snowflake/snowpark_connect/includes/jars/spark-tags_2.12-3.5.6.jar +0 -0
  96. snowflake/snowpark_connect/includes/jars/spark-unsafe_2.12-3.5.6.jar +0 -0
  97. snowflake/snowpark_connect/includes/jars/spark-yarn_2.12-3.5.6.jar +0 -0
  98. snowflake/snowpark_connect/includes/python/__init__.py +21 -0
  99. snowflake/snowpark_connect/includes/python/pyspark/__init__.py +173 -0
  100. snowflake/snowpark_connect/includes/python/pyspark/_globals.py +71 -0
  101. snowflake/snowpark_connect/includes/python/pyspark/_typing.pyi +43 -0
  102. snowflake/snowpark_connect/includes/python/pyspark/accumulators.py +341 -0
  103. snowflake/snowpark_connect/includes/python/pyspark/broadcast.py +383 -0
  104. snowflake/snowpark_connect/includes/python/pyspark/cloudpickle/__init__.py +8 -0
  105. snowflake/snowpark_connect/includes/python/pyspark/cloudpickle/cloudpickle.py +948 -0
  106. snowflake/snowpark_connect/includes/python/pyspark/cloudpickle/cloudpickle_fast.py +844 -0
  107. snowflake/snowpark_connect/includes/python/pyspark/cloudpickle/compat.py +18 -0
  108. snowflake/snowpark_connect/includes/python/pyspark/conf.py +276 -0
  109. snowflake/snowpark_connect/includes/python/pyspark/context.py +2601 -0
  110. snowflake/snowpark_connect/includes/python/pyspark/daemon.py +218 -0
  111. snowflake/snowpark_connect/includes/python/pyspark/errors/__init__.py +70 -0
  112. snowflake/snowpark_connect/includes/python/pyspark/errors/error_classes.py +889 -0
  113. snowflake/snowpark_connect/includes/python/pyspark/errors/exceptions/__init__.py +16 -0
  114. snowflake/snowpark_connect/includes/python/pyspark/errors/exceptions/base.py +228 -0
  115. snowflake/snowpark_connect/includes/python/pyspark/errors/exceptions/captured.py +307 -0
  116. snowflake/snowpark_connect/includes/python/pyspark/errors/exceptions/connect.py +190 -0
  117. snowflake/snowpark_connect/includes/python/pyspark/errors/tests/__init__.py +16 -0
  118. snowflake/snowpark_connect/includes/python/pyspark/errors/tests/test_errors.py +60 -0
  119. snowflake/snowpark_connect/includes/python/pyspark/errors/utils.py +116 -0
  120. snowflake/snowpark_connect/includes/python/pyspark/files.py +165 -0
  121. snowflake/snowpark_connect/includes/python/pyspark/find_spark_home.py +95 -0
  122. snowflake/snowpark_connect/includes/python/pyspark/install.py +203 -0
  123. snowflake/snowpark_connect/includes/python/pyspark/instrumentation_utils.py +190 -0
  124. snowflake/snowpark_connect/includes/python/pyspark/java_gateway.py +248 -0
  125. snowflake/snowpark_connect/includes/python/pyspark/join.py +118 -0
  126. snowflake/snowpark_connect/includes/python/pyspark/ml/__init__.py +71 -0
  127. snowflake/snowpark_connect/includes/python/pyspark/ml/_typing.pyi +84 -0
  128. snowflake/snowpark_connect/includes/python/pyspark/ml/base.py +414 -0
  129. snowflake/snowpark_connect/includes/python/pyspark/ml/classification.py +4332 -0
  130. snowflake/snowpark_connect/includes/python/pyspark/ml/clustering.py +2188 -0
  131. snowflake/snowpark_connect/includes/python/pyspark/ml/common.py +146 -0
  132. snowflake/snowpark_connect/includes/python/pyspark/ml/connect/__init__.py +44 -0
  133. snowflake/snowpark_connect/includes/python/pyspark/ml/connect/base.py +346 -0
  134. snowflake/snowpark_connect/includes/python/pyspark/ml/connect/classification.py +382 -0
  135. snowflake/snowpark_connect/includes/python/pyspark/ml/connect/evaluation.py +291 -0
  136. snowflake/snowpark_connect/includes/python/pyspark/ml/connect/feature.py +258 -0
  137. snowflake/snowpark_connect/includes/python/pyspark/ml/connect/functions.py +77 -0
  138. snowflake/snowpark_connect/includes/python/pyspark/ml/connect/io_utils.py +335 -0
  139. snowflake/snowpark_connect/includes/python/pyspark/ml/connect/pipeline.py +262 -0
  140. snowflake/snowpark_connect/includes/python/pyspark/ml/connect/summarizer.py +120 -0
  141. snowflake/snowpark_connect/includes/python/pyspark/ml/connect/tuning.py +579 -0
  142. snowflake/snowpark_connect/includes/python/pyspark/ml/connect/util.py +173 -0
  143. snowflake/snowpark_connect/includes/python/pyspark/ml/deepspeed/__init__.py +16 -0
  144. snowflake/snowpark_connect/includes/python/pyspark/ml/deepspeed/deepspeed_distributor.py +165 -0
  145. snowflake/snowpark_connect/includes/python/pyspark/ml/deepspeed/tests/test_deepspeed_distributor.py +306 -0
  146. snowflake/snowpark_connect/includes/python/pyspark/ml/dl_util.py +150 -0
  147. snowflake/snowpark_connect/includes/python/pyspark/ml/evaluation.py +1166 -0
  148. snowflake/snowpark_connect/includes/python/pyspark/ml/feature.py +7474 -0
  149. snowflake/snowpark_connect/includes/python/pyspark/ml/fpm.py +543 -0
  150. snowflake/snowpark_connect/includes/python/pyspark/ml/functions.py +842 -0
  151. snowflake/snowpark_connect/includes/python/pyspark/ml/image.py +271 -0
  152. snowflake/snowpark_connect/includes/python/pyspark/ml/linalg/__init__.py +1382 -0
  153. snowflake/snowpark_connect/includes/python/pyspark/ml/model_cache.py +55 -0
  154. snowflake/snowpark_connect/includes/python/pyspark/ml/param/__init__.py +602 -0
  155. snowflake/snowpark_connect/includes/python/pyspark/ml/param/_shared_params_code_gen.py +368 -0
  156. snowflake/snowpark_connect/includes/python/pyspark/ml/param/shared.py +878 -0
  157. snowflake/snowpark_connect/includes/python/pyspark/ml/pipeline.py +451 -0
  158. snowflake/snowpark_connect/includes/python/pyspark/ml/recommendation.py +748 -0
  159. snowflake/snowpark_connect/includes/python/pyspark/ml/regression.py +3335 -0
  160. snowflake/snowpark_connect/includes/python/pyspark/ml/stat.py +523 -0
  161. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/__init__.py +16 -0
  162. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_connect_classification.py +53 -0
  163. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_connect_evaluation.py +50 -0
  164. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_connect_feature.py +43 -0
  165. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_connect_function.py +114 -0
  166. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_connect_pipeline.py +47 -0
  167. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_connect_summarizer.py +43 -0
  168. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_connect_tuning.py +46 -0
  169. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_legacy_mode_classification.py +238 -0
  170. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_legacy_mode_evaluation.py +194 -0
  171. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_legacy_mode_feature.py +156 -0
  172. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_legacy_mode_pipeline.py +184 -0
  173. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_legacy_mode_summarizer.py +78 -0
  174. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_legacy_mode_tuning.py +292 -0
  175. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_parity_torch_data_loader.py +50 -0
  176. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_parity_torch_distributor.py +152 -0
  177. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_algorithms.py +456 -0
  178. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_base.py +96 -0
  179. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_dl_util.py +186 -0
  180. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_evaluation.py +77 -0
  181. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_feature.py +401 -0
  182. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_functions.py +528 -0
  183. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_image.py +82 -0
  184. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_linalg.py +409 -0
  185. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_model_cache.py +55 -0
  186. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_param.py +441 -0
  187. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_persistence.py +546 -0
  188. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_pipeline.py +71 -0
  189. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_stat.py +52 -0
  190. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_training_summary.py +494 -0
  191. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_util.py +85 -0
  192. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_wrapper.py +138 -0
  193. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/tuning/__init__.py +16 -0
  194. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/tuning/test_cv_io_basic.py +151 -0
  195. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/tuning/test_cv_io_nested.py +97 -0
  196. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/tuning/test_cv_io_pipeline.py +143 -0
  197. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/tuning/test_tuning.py +551 -0
  198. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/tuning/test_tvs_io_basic.py +137 -0
  199. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/tuning/test_tvs_io_nested.py +96 -0
  200. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/tuning/test_tvs_io_pipeline.py +142 -0
  201. snowflake/snowpark_connect/includes/python/pyspark/ml/torch/__init__.py +16 -0
  202. snowflake/snowpark_connect/includes/python/pyspark/ml/torch/data.py +100 -0
  203. snowflake/snowpark_connect/includes/python/pyspark/ml/torch/distributor.py +1133 -0
  204. snowflake/snowpark_connect/includes/python/pyspark/ml/torch/log_communication.py +198 -0
  205. snowflake/snowpark_connect/includes/python/pyspark/ml/torch/tests/__init__.py +16 -0
  206. snowflake/snowpark_connect/includes/python/pyspark/ml/torch/tests/test_data_loader.py +137 -0
  207. snowflake/snowpark_connect/includes/python/pyspark/ml/torch/tests/test_distributor.py +561 -0
  208. snowflake/snowpark_connect/includes/python/pyspark/ml/torch/tests/test_log_communication.py +172 -0
  209. snowflake/snowpark_connect/includes/python/pyspark/ml/torch/torch_run_process_wrapper.py +83 -0
  210. snowflake/snowpark_connect/includes/python/pyspark/ml/tree.py +434 -0
  211. snowflake/snowpark_connect/includes/python/pyspark/ml/tuning.py +1741 -0
  212. snowflake/snowpark_connect/includes/python/pyspark/ml/util.py +749 -0
  213. snowflake/snowpark_connect/includes/python/pyspark/ml/wrapper.py +465 -0
  214. snowflake/snowpark_connect/includes/python/pyspark/mllib/__init__.py +44 -0
  215. snowflake/snowpark_connect/includes/python/pyspark/mllib/_typing.pyi +33 -0
  216. snowflake/snowpark_connect/includes/python/pyspark/mllib/classification.py +989 -0
  217. snowflake/snowpark_connect/includes/python/pyspark/mllib/clustering.py +1318 -0
  218. snowflake/snowpark_connect/includes/python/pyspark/mllib/common.py +174 -0
  219. snowflake/snowpark_connect/includes/python/pyspark/mllib/evaluation.py +691 -0
  220. snowflake/snowpark_connect/includes/python/pyspark/mllib/feature.py +1085 -0
  221. snowflake/snowpark_connect/includes/python/pyspark/mllib/fpm.py +233 -0
  222. snowflake/snowpark_connect/includes/python/pyspark/mllib/linalg/__init__.py +1653 -0
  223. snowflake/snowpark_connect/includes/python/pyspark/mllib/linalg/distributed.py +1662 -0
  224. snowflake/snowpark_connect/includes/python/pyspark/mllib/random.py +698 -0
  225. snowflake/snowpark_connect/includes/python/pyspark/mllib/recommendation.py +389 -0
  226. snowflake/snowpark_connect/includes/python/pyspark/mllib/regression.py +1067 -0
  227. snowflake/snowpark_connect/includes/python/pyspark/mllib/stat/KernelDensity.py +59 -0
  228. snowflake/snowpark_connect/includes/python/pyspark/mllib/stat/__init__.py +34 -0
  229. snowflake/snowpark_connect/includes/python/pyspark/mllib/stat/_statistics.py +409 -0
  230. snowflake/snowpark_connect/includes/python/pyspark/mllib/stat/distribution.py +39 -0
  231. snowflake/snowpark_connect/includes/python/pyspark/mllib/stat/test.py +86 -0
  232. snowflake/snowpark_connect/includes/python/pyspark/mllib/tests/__init__.py +16 -0
  233. snowflake/snowpark_connect/includes/python/pyspark/mllib/tests/test_algorithms.py +353 -0
  234. snowflake/snowpark_connect/includes/python/pyspark/mllib/tests/test_feature.py +192 -0
  235. snowflake/snowpark_connect/includes/python/pyspark/mllib/tests/test_linalg.py +680 -0
  236. snowflake/snowpark_connect/includes/python/pyspark/mllib/tests/test_stat.py +206 -0
  237. snowflake/snowpark_connect/includes/python/pyspark/mllib/tests/test_streaming_algorithms.py +471 -0
  238. snowflake/snowpark_connect/includes/python/pyspark/mllib/tests/test_util.py +108 -0
  239. snowflake/snowpark_connect/includes/python/pyspark/mllib/tree.py +888 -0
  240. snowflake/snowpark_connect/includes/python/pyspark/mllib/util.py +659 -0
  241. snowflake/snowpark_connect/includes/python/pyspark/pandas/__init__.py +165 -0
  242. snowflake/snowpark_connect/includes/python/pyspark/pandas/_typing.py +52 -0
  243. snowflake/snowpark_connect/includes/python/pyspark/pandas/accessors.py +989 -0
  244. snowflake/snowpark_connect/includes/python/pyspark/pandas/base.py +1804 -0
  245. snowflake/snowpark_connect/includes/python/pyspark/pandas/categorical.py +822 -0
  246. snowflake/snowpark_connect/includes/python/pyspark/pandas/config.py +539 -0
  247. snowflake/snowpark_connect/includes/python/pyspark/pandas/correlation.py +262 -0
  248. snowflake/snowpark_connect/includes/python/pyspark/pandas/data_type_ops/__init__.py +16 -0
  249. snowflake/snowpark_connect/includes/python/pyspark/pandas/data_type_ops/base.py +519 -0
  250. snowflake/snowpark_connect/includes/python/pyspark/pandas/data_type_ops/binary_ops.py +98 -0
  251. snowflake/snowpark_connect/includes/python/pyspark/pandas/data_type_ops/boolean_ops.py +426 -0
  252. snowflake/snowpark_connect/includes/python/pyspark/pandas/data_type_ops/categorical_ops.py +141 -0
  253. snowflake/snowpark_connect/includes/python/pyspark/pandas/data_type_ops/complex_ops.py +145 -0
  254. snowflake/snowpark_connect/includes/python/pyspark/pandas/data_type_ops/date_ops.py +127 -0
  255. snowflake/snowpark_connect/includes/python/pyspark/pandas/data_type_ops/datetime_ops.py +171 -0
  256. snowflake/snowpark_connect/includes/python/pyspark/pandas/data_type_ops/null_ops.py +83 -0
  257. snowflake/snowpark_connect/includes/python/pyspark/pandas/data_type_ops/num_ops.py +588 -0
  258. snowflake/snowpark_connect/includes/python/pyspark/pandas/data_type_ops/string_ops.py +154 -0
  259. snowflake/snowpark_connect/includes/python/pyspark/pandas/data_type_ops/timedelta_ops.py +101 -0
  260. snowflake/snowpark_connect/includes/python/pyspark/pandas/data_type_ops/udt_ops.py +29 -0
  261. snowflake/snowpark_connect/includes/python/pyspark/pandas/datetimes.py +891 -0
  262. snowflake/snowpark_connect/includes/python/pyspark/pandas/exceptions.py +150 -0
  263. snowflake/snowpark_connect/includes/python/pyspark/pandas/extensions.py +388 -0
  264. snowflake/snowpark_connect/includes/python/pyspark/pandas/frame.py +13738 -0
  265. snowflake/snowpark_connect/includes/python/pyspark/pandas/generic.py +3560 -0
  266. snowflake/snowpark_connect/includes/python/pyspark/pandas/groupby.py +4448 -0
  267. snowflake/snowpark_connect/includes/python/pyspark/pandas/indexes/__init__.py +21 -0
  268. snowflake/snowpark_connect/includes/python/pyspark/pandas/indexes/base.py +2783 -0
  269. snowflake/snowpark_connect/includes/python/pyspark/pandas/indexes/category.py +773 -0
  270. snowflake/snowpark_connect/includes/python/pyspark/pandas/indexes/datetimes.py +843 -0
  271. snowflake/snowpark_connect/includes/python/pyspark/pandas/indexes/multi.py +1323 -0
  272. snowflake/snowpark_connect/includes/python/pyspark/pandas/indexes/numeric.py +210 -0
  273. snowflake/snowpark_connect/includes/python/pyspark/pandas/indexes/timedelta.py +197 -0
  274. snowflake/snowpark_connect/includes/python/pyspark/pandas/indexing.py +1862 -0
  275. snowflake/snowpark_connect/includes/python/pyspark/pandas/internal.py +1680 -0
  276. snowflake/snowpark_connect/includes/python/pyspark/pandas/missing/__init__.py +48 -0
  277. snowflake/snowpark_connect/includes/python/pyspark/pandas/missing/common.py +76 -0
  278. snowflake/snowpark_connect/includes/python/pyspark/pandas/missing/frame.py +63 -0
  279. snowflake/snowpark_connect/includes/python/pyspark/pandas/missing/general_functions.py +43 -0
  280. snowflake/snowpark_connect/includes/python/pyspark/pandas/missing/groupby.py +93 -0
  281. snowflake/snowpark_connect/includes/python/pyspark/pandas/missing/indexes.py +184 -0
  282. snowflake/snowpark_connect/includes/python/pyspark/pandas/missing/resample.py +101 -0
  283. snowflake/snowpark_connect/includes/python/pyspark/pandas/missing/scalars.py +29 -0
  284. snowflake/snowpark_connect/includes/python/pyspark/pandas/missing/series.py +69 -0
  285. snowflake/snowpark_connect/includes/python/pyspark/pandas/missing/window.py +168 -0
  286. snowflake/snowpark_connect/includes/python/pyspark/pandas/mlflow.py +238 -0
  287. snowflake/snowpark_connect/includes/python/pyspark/pandas/namespace.py +3807 -0
  288. snowflake/snowpark_connect/includes/python/pyspark/pandas/numpy_compat.py +260 -0
  289. snowflake/snowpark_connect/includes/python/pyspark/pandas/plot/__init__.py +17 -0
  290. snowflake/snowpark_connect/includes/python/pyspark/pandas/plot/core.py +1213 -0
  291. snowflake/snowpark_connect/includes/python/pyspark/pandas/plot/matplotlib.py +928 -0
  292. snowflake/snowpark_connect/includes/python/pyspark/pandas/plot/plotly.py +261 -0
  293. snowflake/snowpark_connect/includes/python/pyspark/pandas/resample.py +816 -0
  294. snowflake/snowpark_connect/includes/python/pyspark/pandas/series.py +7440 -0
  295. snowflake/snowpark_connect/includes/python/pyspark/pandas/sql_formatter.py +308 -0
  296. snowflake/snowpark_connect/includes/python/pyspark/pandas/sql_processor.py +394 -0
  297. snowflake/snowpark_connect/includes/python/pyspark/pandas/strings.py +2371 -0
  298. snowflake/snowpark_connect/includes/python/pyspark/pandas/supported_api_gen.py +378 -0
  299. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/__init__.py +16 -0
  300. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/__init__.py +16 -0
  301. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_any_all.py +177 -0
  302. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_apply_func.py +575 -0
  303. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_binary_ops.py +235 -0
  304. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_combine.py +653 -0
  305. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_compute.py +463 -0
  306. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_corrwith.py +86 -0
  307. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_cov.py +151 -0
  308. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_cumulative.py +139 -0
  309. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_describe.py +458 -0
  310. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_eval.py +86 -0
  311. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_melt.py +202 -0
  312. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_missing_data.py +520 -0
  313. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_pivot.py +361 -0
  314. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/__init__.py +16 -0
  315. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/__init__.py +16 -0
  316. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_any_all.py +40 -0
  317. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_apply_func.py +42 -0
  318. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_binary_ops.py +40 -0
  319. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_combine.py +37 -0
  320. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_compute.py +60 -0
  321. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_corrwith.py +40 -0
  322. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_cov.py +40 -0
  323. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_cumulative.py +90 -0
  324. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_describe.py +40 -0
  325. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_eval.py +40 -0
  326. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_melt.py +40 -0
  327. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_missing_data.py +42 -0
  328. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_pivot.py +37 -0
  329. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/__init__.py +16 -0
  330. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_base.py +36 -0
  331. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_binary_ops.py +42 -0
  332. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_boolean_ops.py +47 -0
  333. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_categorical_ops.py +55 -0
  334. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_complex_ops.py +40 -0
  335. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_date_ops.py +47 -0
  336. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_datetime_ops.py +47 -0
  337. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_null_ops.py +42 -0
  338. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_num_arithmetic.py +43 -0
  339. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_num_ops.py +47 -0
  340. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_num_reverse.py +43 -0
  341. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_string_ops.py +47 -0
  342. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_timedelta_ops.py +47 -0
  343. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_udt_ops.py +40 -0
  344. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/testing_utils.py +226 -0
  345. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/__init__.py +16 -0
  346. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_align.py +39 -0
  347. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_basic_slow.py +55 -0
  348. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_cov_corrwith.py +39 -0
  349. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_dot_frame.py +39 -0
  350. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_dot_series.py +39 -0
  351. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_index.py +39 -0
  352. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_series.py +39 -0
  353. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_setitem_frame.py +43 -0
  354. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_setitem_series.py +43 -0
  355. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/__init__.py +16 -0
  356. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_attrs.py +40 -0
  357. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_constructor.py +39 -0
  358. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_conversion.py +42 -0
  359. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_reindexing.py +42 -0
  360. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_reshaping.py +37 -0
  361. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_spark.py +40 -0
  362. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_take.py +42 -0
  363. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_time_series.py +48 -0
  364. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_truncate.py +40 -0
  365. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/__init__.py +16 -0
  366. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_aggregate.py +40 -0
  367. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_apply_func.py +41 -0
  368. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_cumulative.py +67 -0
  369. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_describe.py +40 -0
  370. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_groupby.py +55 -0
  371. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_head_tail.py +40 -0
  372. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_index.py +38 -0
  373. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_missing_data.py +55 -0
  374. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_split_apply.py +39 -0
  375. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_stat.py +38 -0
  376. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/__init__.py +16 -0
  377. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_align.py +40 -0
  378. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_base.py +50 -0
  379. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_category.py +73 -0
  380. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_datetime.py +39 -0
  381. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_indexing.py +40 -0
  382. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_reindex.py +40 -0
  383. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_rename.py +40 -0
  384. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_reset_index.py +48 -0
  385. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_timedelta.py +39 -0
  386. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/io/__init__.py +16 -0
  387. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/io/test_parity_io.py +40 -0
  388. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/plot/__init__.py +16 -0
  389. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/plot/test_parity_frame_plot.py +45 -0
  390. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/plot/test_parity_frame_plot_matplotlib.py +45 -0
  391. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/plot/test_parity_frame_plot_plotly.py +49 -0
  392. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/plot/test_parity_series_plot.py +37 -0
  393. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/plot/test_parity_series_plot_matplotlib.py +53 -0
  394. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/plot/test_parity_series_plot_plotly.py +45 -0
  395. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/__init__.py +16 -0
  396. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_all_any.py +38 -0
  397. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_arg_ops.py +37 -0
  398. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_as_of.py +37 -0
  399. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_as_type.py +38 -0
  400. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_compute.py +37 -0
  401. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_conversion.py +40 -0
  402. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_cumulative.py +40 -0
  403. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_index.py +38 -0
  404. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_missing_data.py +40 -0
  405. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_series.py +37 -0
  406. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_sort.py +38 -0
  407. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_stat.py +38 -0
  408. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_categorical.py +66 -0
  409. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_config.py +37 -0
  410. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_csv.py +37 -0
  411. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_dataframe_conversion.py +42 -0
  412. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_dataframe_spark_io.py +39 -0
  413. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_default_index.py +49 -0
  414. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_ewm.py +37 -0
  415. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_expanding.py +39 -0
  416. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_extension.py +49 -0
  417. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_frame_spark.py +53 -0
  418. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_generic_functions.py +43 -0
  419. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_indexing.py +49 -0
  420. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_indexops_spark.py +39 -0
  421. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_internal.py +41 -0
  422. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_namespace.py +39 -0
  423. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_numpy_compat.py +60 -0
  424. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_ops_on_diff_frames.py +48 -0
  425. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_ops_on_diff_frames_groupby.py +39 -0
  426. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_ops_on_diff_frames_groupby_expanding.py +44 -0
  427. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_ops_on_diff_frames_groupby_rolling.py +84 -0
  428. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_repr.py +37 -0
  429. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_resample.py +45 -0
  430. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_reshape.py +39 -0
  431. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_rolling.py +39 -0
  432. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_scalars.py +37 -0
  433. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_series_conversion.py +39 -0
  434. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_series_datetime.py +39 -0
  435. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_series_string.py +39 -0
  436. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_spark_functions.py +39 -0
  437. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_sql.py +43 -0
  438. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_stats.py +37 -0
  439. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_typedef.py +36 -0
  440. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_utils.py +37 -0
  441. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_window.py +39 -0
  442. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/__init__.py +16 -0
  443. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_base.py +107 -0
  444. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_binary_ops.py +224 -0
  445. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_boolean_ops.py +825 -0
  446. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_categorical_ops.py +562 -0
  447. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_complex_ops.py +368 -0
  448. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_date_ops.py +257 -0
  449. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_datetime_ops.py +260 -0
  450. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_null_ops.py +178 -0
  451. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_num_arithmetic.py +184 -0
  452. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_num_ops.py +497 -0
  453. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_num_reverse.py +140 -0
  454. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_string_ops.py +354 -0
  455. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_timedelta_ops.py +219 -0
  456. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_udt_ops.py +192 -0
  457. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/testing_utils.py +228 -0
  458. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/__init__.py +16 -0
  459. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_align.py +118 -0
  460. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_basic_slow.py +198 -0
  461. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_cov_corrwith.py +181 -0
  462. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_dot_frame.py +103 -0
  463. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_dot_series.py +141 -0
  464. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_index.py +109 -0
  465. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_series.py +136 -0
  466. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_setitem_frame.py +125 -0
  467. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_setitem_series.py +217 -0
  468. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/__init__.py +16 -0
  469. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_attrs.py +384 -0
  470. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_constructor.py +598 -0
  471. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_conversion.py +73 -0
  472. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_reindexing.py +869 -0
  473. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_reshaping.py +487 -0
  474. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_spark.py +309 -0
  475. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_take.py +156 -0
  476. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_time_series.py +149 -0
  477. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_truncate.py +163 -0
  478. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/__init__.py +16 -0
  479. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_aggregate.py +311 -0
  480. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_apply_func.py +524 -0
  481. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_cumulative.py +419 -0
  482. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_describe.py +144 -0
  483. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_groupby.py +979 -0
  484. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_head_tail.py +234 -0
  485. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_index.py +206 -0
  486. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_missing_data.py +421 -0
  487. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_split_apply.py +187 -0
  488. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_stat.py +397 -0
  489. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/__init__.py +16 -0
  490. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_align.py +100 -0
  491. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_base.py +2743 -0
  492. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_category.py +484 -0
  493. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_datetime.py +276 -0
  494. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_indexing.py +432 -0
  495. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_reindex.py +310 -0
  496. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_rename.py +257 -0
  497. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_reset_index.py +160 -0
  498. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_timedelta.py +128 -0
  499. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/io/__init__.py +16 -0
  500. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/io/test_io.py +137 -0
  501. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/plot/__init__.py +16 -0
  502. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/plot/test_frame_plot.py +170 -0
  503. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/plot/test_frame_plot_matplotlib.py +547 -0
  504. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/plot/test_frame_plot_plotly.py +285 -0
  505. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/plot/test_series_plot.py +106 -0
  506. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/plot/test_series_plot_matplotlib.py +409 -0
  507. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/plot/test_series_plot_plotly.py +247 -0
  508. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/__init__.py +16 -0
  509. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_all_any.py +105 -0
  510. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_arg_ops.py +197 -0
  511. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_as_of.py +137 -0
  512. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_as_type.py +227 -0
  513. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_compute.py +634 -0
  514. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_conversion.py +88 -0
  515. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_cumulative.py +139 -0
  516. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_index.py +475 -0
  517. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_missing_data.py +265 -0
  518. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_series.py +818 -0
  519. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_sort.py +162 -0
  520. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_stat.py +780 -0
  521. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_categorical.py +741 -0
  522. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_config.py +160 -0
  523. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_csv.py +453 -0
  524. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_dataframe_conversion.py +281 -0
  525. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_dataframe_spark_io.py +487 -0
  526. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_default_index.py +109 -0
  527. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_ewm.py +434 -0
  528. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_expanding.py +253 -0
  529. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_extension.py +152 -0
  530. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_frame_spark.py +162 -0
  531. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_generic_functions.py +234 -0
  532. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_indexing.py +1339 -0
  533. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_indexops_spark.py +82 -0
  534. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_internal.py +124 -0
  535. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_namespace.py +638 -0
  536. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_numpy_compat.py +200 -0
  537. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_ops_on_diff_frames.py +1355 -0
  538. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_ops_on_diff_frames_groupby.py +655 -0
  539. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_ops_on_diff_frames_groupby_expanding.py +113 -0
  540. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_ops_on_diff_frames_groupby_rolling.py +118 -0
  541. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_repr.py +192 -0
  542. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_resample.py +346 -0
  543. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_reshape.py +495 -0
  544. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_rolling.py +263 -0
  545. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_scalars.py +59 -0
  546. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_series_conversion.py +85 -0
  547. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_series_datetime.py +364 -0
  548. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_series_string.py +362 -0
  549. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_spark_functions.py +46 -0
  550. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_sql.py +123 -0
  551. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_stats.py +581 -0
  552. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_typedef.py +447 -0
  553. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_utils.py +301 -0
  554. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_window.py +465 -0
  555. snowflake/snowpark_connect/includes/python/pyspark/pandas/typedef/__init__.py +18 -0
  556. snowflake/snowpark_connect/includes/python/pyspark/pandas/typedef/typehints.py +874 -0
  557. snowflake/snowpark_connect/includes/python/pyspark/pandas/usage_logging/__init__.py +143 -0
  558. snowflake/snowpark_connect/includes/python/pyspark/pandas/usage_logging/usage_logger.py +132 -0
  559. snowflake/snowpark_connect/includes/python/pyspark/pandas/utils.py +1063 -0
  560. snowflake/snowpark_connect/includes/python/pyspark/pandas/window.py +2702 -0
  561. snowflake/snowpark_connect/includes/python/pyspark/profiler.py +489 -0
  562. snowflake/snowpark_connect/includes/python/pyspark/py.typed +1 -0
  563. snowflake/snowpark_connect/includes/python/pyspark/python/pyspark/shell.py +123 -0
  564. snowflake/snowpark_connect/includes/python/pyspark/rdd.py +5518 -0
  565. snowflake/snowpark_connect/includes/python/pyspark/rddsampler.py +115 -0
  566. snowflake/snowpark_connect/includes/python/pyspark/resource/__init__.py +38 -0
  567. snowflake/snowpark_connect/includes/python/pyspark/resource/information.py +69 -0
  568. snowflake/snowpark_connect/includes/python/pyspark/resource/profile.py +317 -0
  569. snowflake/snowpark_connect/includes/python/pyspark/resource/requests.py +539 -0
  570. snowflake/snowpark_connect/includes/python/pyspark/resource/tests/__init__.py +16 -0
  571. snowflake/snowpark_connect/includes/python/pyspark/resource/tests/test_resources.py +83 -0
  572. snowflake/snowpark_connect/includes/python/pyspark/resultiterable.py +45 -0
  573. snowflake/snowpark_connect/includes/python/pyspark/serializers.py +681 -0
  574. snowflake/snowpark_connect/includes/python/pyspark/shell.py +123 -0
  575. snowflake/snowpark_connect/includes/python/pyspark/shuffle.py +854 -0
  576. snowflake/snowpark_connect/includes/python/pyspark/sql/__init__.py +75 -0
  577. snowflake/snowpark_connect/includes/python/pyspark/sql/_typing.pyi +80 -0
  578. snowflake/snowpark_connect/includes/python/pyspark/sql/avro/__init__.py +18 -0
  579. snowflake/snowpark_connect/includes/python/pyspark/sql/avro/functions.py +188 -0
  580. snowflake/snowpark_connect/includes/python/pyspark/sql/catalog.py +1270 -0
  581. snowflake/snowpark_connect/includes/python/pyspark/sql/column.py +1431 -0
  582. snowflake/snowpark_connect/includes/python/pyspark/sql/conf.py +99 -0
  583. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/__init__.py +18 -0
  584. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/_typing.py +90 -0
  585. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/avro/__init__.py +18 -0
  586. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/avro/functions.py +107 -0
  587. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/catalog.py +356 -0
  588. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/client/__init__.py +22 -0
  589. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/client/artifact.py +412 -0
  590. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/client/core.py +1689 -0
  591. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/client/reattach.py +340 -0
  592. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/column.py +514 -0
  593. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/conf.py +128 -0
  594. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/conversion.py +490 -0
  595. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/dataframe.py +2172 -0
  596. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/expressions.py +1056 -0
  597. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/functions.py +3937 -0
  598. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/group.py +418 -0
  599. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/plan.py +2289 -0
  600. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/proto/__init__.py +25 -0
  601. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/proto/base_pb2.py +203 -0
  602. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/proto/base_pb2.pyi +2718 -0
  603. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/proto/base_pb2_grpc.py +423 -0
  604. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/proto/catalog_pb2.py +109 -0
  605. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/proto/catalog_pb2.pyi +1130 -0
  606. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/proto/commands_pb2.py +141 -0
  607. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/proto/commands_pb2.pyi +1766 -0
  608. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/proto/common_pb2.py +47 -0
  609. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/proto/common_pb2.pyi +123 -0
  610. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/proto/example_plugins_pb2.py +53 -0
  611. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/proto/example_plugins_pb2.pyi +112 -0
  612. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/proto/expressions_pb2.py +107 -0
  613. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/proto/expressions_pb2.pyi +1507 -0
  614. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/proto/relations_pb2.py +195 -0
  615. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/proto/relations_pb2.pyi +3613 -0
  616. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/proto/types_pb2.py +95 -0
  617. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/proto/types_pb2.pyi +980 -0
  618. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/protobuf/__init__.py +18 -0
  619. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/protobuf/functions.py +166 -0
  620. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/readwriter.py +861 -0
  621. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/session.py +952 -0
  622. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/streaming/__init__.py +22 -0
  623. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/streaming/query.py +295 -0
  624. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/streaming/readwriter.py +618 -0
  625. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/streaming/worker/__init__.py +18 -0
  626. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/streaming/worker/foreach_batch_worker.py +87 -0
  627. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/streaming/worker/listener_worker.py +100 -0
  628. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/types.py +301 -0
  629. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/udf.py +296 -0
  630. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/udtf.py +200 -0
  631. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/utils.py +58 -0
  632. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/window.py +266 -0
  633. snowflake/snowpark_connect/includes/python/pyspark/sql/context.py +818 -0
  634. snowflake/snowpark_connect/includes/python/pyspark/sql/dataframe.py +5973 -0
  635. snowflake/snowpark_connect/includes/python/pyspark/sql/functions.py +15889 -0
  636. snowflake/snowpark_connect/includes/python/pyspark/sql/group.py +547 -0
  637. snowflake/snowpark_connect/includes/python/pyspark/sql/observation.py +152 -0
  638. snowflake/snowpark_connect/includes/python/pyspark/sql/pandas/__init__.py +21 -0
  639. snowflake/snowpark_connect/includes/python/pyspark/sql/pandas/_typing/__init__.pyi +344 -0
  640. snowflake/snowpark_connect/includes/python/pyspark/sql/pandas/_typing/protocols/__init__.pyi +17 -0
  641. snowflake/snowpark_connect/includes/python/pyspark/sql/pandas/_typing/protocols/frame.pyi +20 -0
  642. snowflake/snowpark_connect/includes/python/pyspark/sql/pandas/_typing/protocols/series.pyi +20 -0
  643. snowflake/snowpark_connect/includes/python/pyspark/sql/pandas/conversion.py +671 -0
  644. snowflake/snowpark_connect/includes/python/pyspark/sql/pandas/functions.py +480 -0
  645. snowflake/snowpark_connect/includes/python/pyspark/sql/pandas/functions.pyi +132 -0
  646. snowflake/snowpark_connect/includes/python/pyspark/sql/pandas/group_ops.py +523 -0
  647. snowflake/snowpark_connect/includes/python/pyspark/sql/pandas/map_ops.py +216 -0
  648. snowflake/snowpark_connect/includes/python/pyspark/sql/pandas/serializers.py +1019 -0
  649. snowflake/snowpark_connect/includes/python/pyspark/sql/pandas/typehints.py +172 -0
  650. snowflake/snowpark_connect/includes/python/pyspark/sql/pandas/types.py +972 -0
  651. snowflake/snowpark_connect/includes/python/pyspark/sql/pandas/utils.py +86 -0
  652. snowflake/snowpark_connect/includes/python/pyspark/sql/protobuf/__init__.py +18 -0
  653. snowflake/snowpark_connect/includes/python/pyspark/sql/protobuf/functions.py +334 -0
  654. snowflake/snowpark_connect/includes/python/pyspark/sql/readwriter.py +2159 -0
  655. snowflake/snowpark_connect/includes/python/pyspark/sql/session.py +2088 -0
  656. snowflake/snowpark_connect/includes/python/pyspark/sql/sql_formatter.py +84 -0
  657. snowflake/snowpark_connect/includes/python/pyspark/sql/streaming/__init__.py +21 -0
  658. snowflake/snowpark_connect/includes/python/pyspark/sql/streaming/listener.py +1050 -0
  659. snowflake/snowpark_connect/includes/python/pyspark/sql/streaming/query.py +746 -0
  660. snowflake/snowpark_connect/includes/python/pyspark/sql/streaming/readwriter.py +1652 -0
  661. snowflake/snowpark_connect/includes/python/pyspark/sql/streaming/state.py +288 -0
  662. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/__init__.py +16 -0
  663. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/__init__.py +16 -0
  664. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/client/__init__.py +16 -0
  665. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/client/test_artifact.py +420 -0
  666. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/client/test_client.py +358 -0
  667. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/streaming/__init__.py +16 -0
  668. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/streaming/test_parity_foreach.py +36 -0
  669. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/streaming/test_parity_foreach_batch.py +44 -0
  670. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/streaming/test_parity_listener.py +116 -0
  671. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/streaming/test_parity_streaming.py +35 -0
  672. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_connect_basic.py +3612 -0
  673. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_connect_column.py +1042 -0
  674. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_connect_function.py +2381 -0
  675. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_connect_plan.py +1060 -0
  676. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_arrow.py +163 -0
  677. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_arrow_map.py +38 -0
  678. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_arrow_python_udf.py +48 -0
  679. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_catalog.py +36 -0
  680. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_column.py +55 -0
  681. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_conf.py +36 -0
  682. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_dataframe.py +96 -0
  683. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_datasources.py +44 -0
  684. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_errors.py +36 -0
  685. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_functions.py +59 -0
  686. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_group.py +36 -0
  687. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_pandas_cogrouped_map.py +59 -0
  688. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_pandas_grouped_map.py +74 -0
  689. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_pandas_grouped_map_with_state.py +62 -0
  690. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_pandas_map.py +58 -0
  691. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_pandas_udf.py +70 -0
  692. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_pandas_udf_grouped_agg.py +50 -0
  693. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_pandas_udf_scalar.py +68 -0
  694. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_pandas_udf_window.py +40 -0
  695. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_readwriter.py +46 -0
  696. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_serde.py +44 -0
  697. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_types.py +100 -0
  698. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_udf.py +100 -0
  699. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_udtf.py +163 -0
  700. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_session.py +181 -0
  701. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_utils.py +42 -0
  702. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/__init__.py +16 -0
  703. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_cogrouped_map.py +623 -0
  704. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_grouped_map.py +869 -0
  705. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_grouped_map_with_state.py +342 -0
  706. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_map.py +436 -0
  707. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_udf.py +363 -0
  708. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_udf_grouped_agg.py +592 -0
  709. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_udf_scalar.py +1503 -0
  710. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_udf_typehints.py +392 -0
  711. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_udf_typehints_with_future_annotations.py +375 -0
  712. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_udf_window.py +411 -0
  713. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/streaming/__init__.py +16 -0
  714. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/streaming/test_streaming.py +401 -0
  715. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/streaming/test_streaming_foreach.py +295 -0
  716. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/streaming/test_streaming_foreach_batch.py +106 -0
  717. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/streaming/test_streaming_listener.py +558 -0
  718. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_arrow.py +1346 -0
  719. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_arrow_map.py +182 -0
  720. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_arrow_python_udf.py +202 -0
  721. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_catalog.py +503 -0
  722. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_column.py +225 -0
  723. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_conf.py +83 -0
  724. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_context.py +201 -0
  725. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_dataframe.py +1931 -0
  726. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_datasources.py +256 -0
  727. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_errors.py +69 -0
  728. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_functions.py +1349 -0
  729. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_group.py +53 -0
  730. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_pandas_sqlmetrics.py +68 -0
  731. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_readwriter.py +283 -0
  732. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_serde.py +155 -0
  733. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_session.py +412 -0
  734. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_types.py +1581 -0
  735. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_udf.py +961 -0
  736. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_udf_profiler.py +165 -0
  737. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_udtf.py +1456 -0
  738. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_utils.py +1686 -0
  739. snowflake/snowpark_connect/includes/python/pyspark/sql/types.py +2558 -0
  740. snowflake/snowpark_connect/includes/python/pyspark/sql/udf.py +714 -0
  741. snowflake/snowpark_connect/includes/python/pyspark/sql/udtf.py +325 -0
  742. snowflake/snowpark_connect/includes/python/pyspark/sql/utils.py +339 -0
  743. snowflake/snowpark_connect/includes/python/pyspark/sql/window.py +492 -0
  744. snowflake/snowpark_connect/includes/python/pyspark/statcounter.py +165 -0
  745. snowflake/snowpark_connect/includes/python/pyspark/status.py +112 -0
  746. snowflake/snowpark_connect/includes/python/pyspark/storagelevel.py +97 -0
  747. snowflake/snowpark_connect/includes/python/pyspark/streaming/__init__.py +22 -0
  748. snowflake/snowpark_connect/includes/python/pyspark/streaming/context.py +471 -0
  749. snowflake/snowpark_connect/includes/python/pyspark/streaming/dstream.py +933 -0
  750. snowflake/snowpark_connect/includes/python/pyspark/streaming/kinesis.py +205 -0
  751. snowflake/snowpark_connect/includes/python/pyspark/streaming/listener.py +83 -0
  752. snowflake/snowpark_connect/includes/python/pyspark/streaming/tests/__init__.py +16 -0
  753. snowflake/snowpark_connect/includes/python/pyspark/streaming/tests/test_context.py +184 -0
  754. snowflake/snowpark_connect/includes/python/pyspark/streaming/tests/test_dstream.py +706 -0
  755. snowflake/snowpark_connect/includes/python/pyspark/streaming/tests/test_kinesis.py +118 -0
  756. snowflake/snowpark_connect/includes/python/pyspark/streaming/tests/test_listener.py +160 -0
  757. snowflake/snowpark_connect/includes/python/pyspark/streaming/util.py +168 -0
  758. snowflake/snowpark_connect/includes/python/pyspark/taskcontext.py +502 -0
  759. snowflake/snowpark_connect/includes/python/pyspark/testing/__init__.py +21 -0
  760. snowflake/snowpark_connect/includes/python/pyspark/testing/connectutils.py +199 -0
  761. snowflake/snowpark_connect/includes/python/pyspark/testing/mllibutils.py +30 -0
  762. snowflake/snowpark_connect/includes/python/pyspark/testing/mlutils.py +275 -0
  763. snowflake/snowpark_connect/includes/python/pyspark/testing/objects.py +121 -0
  764. snowflake/snowpark_connect/includes/python/pyspark/testing/pandasutils.py +714 -0
  765. snowflake/snowpark_connect/includes/python/pyspark/testing/sqlutils.py +168 -0
  766. snowflake/snowpark_connect/includes/python/pyspark/testing/streamingutils.py +178 -0
  767. snowflake/snowpark_connect/includes/python/pyspark/testing/utils.py +636 -0
  768. snowflake/snowpark_connect/includes/python/pyspark/tests/__init__.py +16 -0
  769. snowflake/snowpark_connect/includes/python/pyspark/tests/test_appsubmit.py +306 -0
  770. snowflake/snowpark_connect/includes/python/pyspark/tests/test_broadcast.py +196 -0
  771. snowflake/snowpark_connect/includes/python/pyspark/tests/test_conf.py +44 -0
  772. snowflake/snowpark_connect/includes/python/pyspark/tests/test_context.py +346 -0
  773. snowflake/snowpark_connect/includes/python/pyspark/tests/test_daemon.py +89 -0
  774. snowflake/snowpark_connect/includes/python/pyspark/tests/test_install_spark.py +124 -0
  775. snowflake/snowpark_connect/includes/python/pyspark/tests/test_join.py +69 -0
  776. snowflake/snowpark_connect/includes/python/pyspark/tests/test_memory_profiler.py +167 -0
  777. snowflake/snowpark_connect/includes/python/pyspark/tests/test_pin_thread.py +194 -0
  778. snowflake/snowpark_connect/includes/python/pyspark/tests/test_profiler.py +168 -0
  779. snowflake/snowpark_connect/includes/python/pyspark/tests/test_rdd.py +939 -0
  780. snowflake/snowpark_connect/includes/python/pyspark/tests/test_rddbarrier.py +52 -0
  781. snowflake/snowpark_connect/includes/python/pyspark/tests/test_rddsampler.py +66 -0
  782. snowflake/snowpark_connect/includes/python/pyspark/tests/test_readwrite.py +368 -0
  783. snowflake/snowpark_connect/includes/python/pyspark/tests/test_serializers.py +257 -0
  784. snowflake/snowpark_connect/includes/python/pyspark/tests/test_shuffle.py +267 -0
  785. snowflake/snowpark_connect/includes/python/pyspark/tests/test_stage_sched.py +153 -0
  786. snowflake/snowpark_connect/includes/python/pyspark/tests/test_statcounter.py +130 -0
  787. snowflake/snowpark_connect/includes/python/pyspark/tests/test_taskcontext.py +350 -0
  788. snowflake/snowpark_connect/includes/python/pyspark/tests/test_util.py +97 -0
  789. snowflake/snowpark_connect/includes/python/pyspark/tests/test_worker.py +271 -0
  790. snowflake/snowpark_connect/includes/python/pyspark/traceback_utils.py +81 -0
  791. snowflake/snowpark_connect/includes/python/pyspark/util.py +416 -0
  792. snowflake/snowpark_connect/includes/python/pyspark/version.py +19 -0
  793. snowflake/snowpark_connect/includes/python/pyspark/worker.py +1307 -0
  794. snowflake/snowpark_connect/includes/python/pyspark/worker_util.py +46 -0
  795. snowflake/snowpark_connect/proto/__init__.py +10 -0
  796. snowflake/snowpark_connect/proto/control_pb2.py +35 -0
  797. snowflake/snowpark_connect/proto/control_pb2.pyi +38 -0
  798. snowflake/snowpark_connect/proto/control_pb2_grpc.py +183 -0
  799. snowflake/snowpark_connect/proto/snowflake_expression_ext_pb2.py +35 -0
  800. snowflake/snowpark_connect/proto/snowflake_expression_ext_pb2.pyi +53 -0
  801. snowflake/snowpark_connect/proto/snowflake_rdd_pb2.pyi +39 -0
  802. snowflake/snowpark_connect/proto/snowflake_relation_ext_pb2.py +47 -0
  803. snowflake/snowpark_connect/proto/snowflake_relation_ext_pb2.pyi +111 -0
  804. snowflake/snowpark_connect/relation/__init__.py +3 -0
  805. snowflake/snowpark_connect/relation/catalogs/__init__.py +12 -0
  806. snowflake/snowpark_connect/relation/catalogs/abstract_spark_catalog.py +287 -0
  807. snowflake/snowpark_connect/relation/catalogs/snowflake_catalog.py +467 -0
  808. snowflake/snowpark_connect/relation/catalogs/utils.py +51 -0
  809. snowflake/snowpark_connect/relation/io_utils.py +76 -0
  810. snowflake/snowpark_connect/relation/map_aggregate.py +322 -0
  811. snowflake/snowpark_connect/relation/map_catalog.py +151 -0
  812. snowflake/snowpark_connect/relation/map_column_ops.py +1068 -0
  813. snowflake/snowpark_connect/relation/map_crosstab.py +48 -0
  814. snowflake/snowpark_connect/relation/map_extension.py +412 -0
  815. snowflake/snowpark_connect/relation/map_join.py +341 -0
  816. snowflake/snowpark_connect/relation/map_local_relation.py +326 -0
  817. snowflake/snowpark_connect/relation/map_map_partitions.py +146 -0
  818. snowflake/snowpark_connect/relation/map_relation.py +253 -0
  819. snowflake/snowpark_connect/relation/map_row_ops.py +716 -0
  820. snowflake/snowpark_connect/relation/map_sample_by.py +35 -0
  821. snowflake/snowpark_connect/relation/map_show_string.py +50 -0
  822. snowflake/snowpark_connect/relation/map_sql.py +1874 -0
  823. snowflake/snowpark_connect/relation/map_stats.py +324 -0
  824. snowflake/snowpark_connect/relation/map_subquery_alias.py +32 -0
  825. snowflake/snowpark_connect/relation/map_udtf.py +288 -0
  826. snowflake/snowpark_connect/relation/read/__init__.py +7 -0
  827. snowflake/snowpark_connect/relation/read/jdbc_read_dbapi.py +668 -0
  828. snowflake/snowpark_connect/relation/read/map_read.py +367 -0
  829. snowflake/snowpark_connect/relation/read/map_read_csv.py +142 -0
  830. snowflake/snowpark_connect/relation/read/map_read_jdbc.py +108 -0
  831. snowflake/snowpark_connect/relation/read/map_read_json.py +344 -0
  832. snowflake/snowpark_connect/relation/read/map_read_parquet.py +194 -0
  833. snowflake/snowpark_connect/relation/read/map_read_socket.py +59 -0
  834. snowflake/snowpark_connect/relation/read/map_read_table.py +109 -0
  835. snowflake/snowpark_connect/relation/read/map_read_text.py +106 -0
  836. snowflake/snowpark_connect/relation/read/reader_config.py +399 -0
  837. snowflake/snowpark_connect/relation/read/utils.py +155 -0
  838. snowflake/snowpark_connect/relation/stage_locator.py +161 -0
  839. snowflake/snowpark_connect/relation/utils.py +219 -0
  840. snowflake/snowpark_connect/relation/write/__init__.py +3 -0
  841. snowflake/snowpark_connect/relation/write/jdbc_write_dbapi.py +339 -0
  842. snowflake/snowpark_connect/relation/write/map_write.py +436 -0
  843. snowflake/snowpark_connect/relation/write/map_write_jdbc.py +48 -0
  844. snowflake/snowpark_connect/resources/java_udfs-1.0-SNAPSHOT.jar +0 -0
  845. snowflake/snowpark_connect/resources_initializer.py +75 -0
  846. snowflake/snowpark_connect/server.py +1136 -0
  847. snowflake/snowpark_connect/start_server.py +32 -0
  848. snowflake/snowpark_connect/tcm.py +8 -0
  849. snowflake/snowpark_connect/type_mapping.py +1003 -0
  850. snowflake/snowpark_connect/typed_column.py +94 -0
  851. snowflake/snowpark_connect/utils/__init__.py +3 -0
  852. snowflake/snowpark_connect/utils/artifacts.py +48 -0
  853. snowflake/snowpark_connect/utils/attribute_handling.py +72 -0
  854. snowflake/snowpark_connect/utils/cache.py +84 -0
  855. snowflake/snowpark_connect/utils/concurrent.py +124 -0
  856. snowflake/snowpark_connect/utils/context.py +390 -0
  857. snowflake/snowpark_connect/utils/describe_query_cache.py +231 -0
  858. snowflake/snowpark_connect/utils/interrupt.py +85 -0
  859. snowflake/snowpark_connect/utils/io_utils.py +35 -0
  860. snowflake/snowpark_connect/utils/pandas_udtf_utils.py +117 -0
  861. snowflake/snowpark_connect/utils/profiling.py +47 -0
  862. snowflake/snowpark_connect/utils/session.py +180 -0
  863. snowflake/snowpark_connect/utils/snowpark_connect_logging.py +38 -0
  864. snowflake/snowpark_connect/utils/telemetry.py +513 -0
  865. snowflake/snowpark_connect/utils/udf_cache.py +392 -0
  866. snowflake/snowpark_connect/utils/udf_helper.py +328 -0
  867. snowflake/snowpark_connect/utils/udf_utils.py +310 -0
  868. snowflake/snowpark_connect/utils/udtf_helper.py +420 -0
  869. snowflake/snowpark_connect/utils/udtf_utils.py +799 -0
  870. snowflake/snowpark_connect/utils/xxhash64.py +247 -0
  871. snowflake/snowpark_connect/version.py +6 -0
  872. snowpark_connect-0.20.2.data/scripts/snowpark-connect +71 -0
  873. snowpark_connect-0.20.2.data/scripts/snowpark-session +11 -0
  874. snowpark_connect-0.20.2.data/scripts/snowpark-submit +354 -0
  875. snowpark_connect-0.20.2.dist-info/METADATA +37 -0
  876. snowpark_connect-0.20.2.dist-info/RECORD +879 -0
  877. snowpark_connect-0.20.2.dist-info/WHEEL +5 -0
  878. snowpark_connect-0.20.2.dist-info/licenses/LICENSE.txt +202 -0
  879. snowpark_connect-0.20.2.dist-info/top_level.txt +1 -0
@@ -0,0 +1,1213 @@
1
+ #
2
+ # Licensed to the Apache Software Foundation (ASF) under one or more
3
+ # contributor license agreements. See the NOTICE file distributed with
4
+ # this work for additional information regarding copyright ownership.
5
+ # The ASF licenses this file to You under the Apache License, Version 2.0
6
+ # (the "License"); you may not use this file except in compliance with
7
+ # the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing, software
12
+ # distributed under the License is distributed on an "AS IS" BASIS,
13
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ # See the License for the specific language governing permissions and
15
+ # limitations under the License.
16
+ #
17
+
18
+ import importlib
19
+
20
+ import pandas as pd
21
+ import numpy as np
22
+ from pyspark.ml.feature import Bucketizer
23
+ from pyspark.mllib.stat import KernelDensity
24
+ from pyspark.sql import functions as F
25
+ from pandas.core.base import PandasObject
26
+ from pandas.core.dtypes.inference import is_integer
27
+
28
+ from pyspark.pandas.missing import unsupported_function
29
+ from pyspark.pandas.config import get_option
30
+ from pyspark.pandas.utils import name_like_string
31
+
32
+
33
+ class TopNPlotBase:
34
+ def get_top_n(self, data):
35
+ from pyspark.pandas import DataFrame, Series
36
+
37
+ max_rows = get_option("plotting.max_rows")
38
+ # Simply use the first 1k elements and make it into a pandas dataframe
39
+ # For categorical variables, it is likely called from df.x.value_counts().plot.xxx().
40
+ if isinstance(data, (Series, DataFrame)):
41
+ data = data.head(max_rows + 1)._to_pandas()
42
+ else:
43
+ raise TypeError("Only DataFrame and Series are supported for plotting.")
44
+
45
+ self.partial = False
46
+ if len(data) > max_rows:
47
+ self.partial = True
48
+ data = data.iloc[:max_rows]
49
+ return data
50
+
51
+ def set_result_text(self, ax):
52
+ max_rows = get_option("plotting.max_rows")
53
+ assert hasattr(self, "partial")
54
+
55
+ if self.partial:
56
+ ax.text(
57
+ 1,
58
+ 1,
59
+ "showing top {} elements only".format(max_rows),
60
+ size=6,
61
+ ha="right",
62
+ va="bottom",
63
+ transform=ax.transAxes,
64
+ )
65
+
66
+
67
+ class SampledPlotBase:
68
+ def get_sampled(self, data):
69
+ from pyspark.pandas import DataFrame, Series
70
+
71
+ fraction = get_option("plotting.sample_ratio")
72
+ if fraction is None:
73
+ fraction = 1 / (len(data) / get_option("plotting.max_rows"))
74
+ fraction = min(1.0, fraction)
75
+ self.fraction = fraction
76
+
77
+ if isinstance(data, (DataFrame, Series)):
78
+ if isinstance(data, Series):
79
+ data = data.to_frame()
80
+ sampled = data._internal.resolved_copy.spark_frame.sample(fraction=self.fraction)
81
+ return DataFrame(data._internal.with_new_sdf(sampled))._to_pandas()
82
+ else:
83
+ raise TypeError("Only DataFrame and Series are supported for plotting.")
84
+
85
+ def set_result_text(self, ax):
86
+ assert hasattr(self, "fraction")
87
+
88
+ if self.fraction < 1:
89
+ ax.text(
90
+ 1,
91
+ 1,
92
+ "showing the sampled result by fraction %s" % self.fraction,
93
+ size=6,
94
+ ha="right",
95
+ va="bottom",
96
+ transform=ax.transAxes,
97
+ )
98
+
99
+
100
+ class NumericPlotBase:
101
+ @staticmethod
102
+ def prepare_numeric_data(data):
103
+ from pyspark.pandas.series import Series
104
+
105
+ if isinstance(data, Series):
106
+ data = data.to_frame()
107
+
108
+ numeric_data = data.select_dtypes(
109
+ include=["byte", "decimal", "integer", "float", "long", "double", np.datetime64]
110
+ )
111
+
112
+ # no empty frames or series allowed
113
+ if len(numeric_data.columns) == 0:
114
+ raise TypeError(
115
+ "Empty {0!r}: no numeric data to " "plot".format(numeric_data.__class__.__name__)
116
+ )
117
+
118
+ return data, numeric_data
119
+
120
+
121
+ class HistogramPlotBase(NumericPlotBase):
122
+ @staticmethod
123
+ def prepare_hist_data(data, bins):
124
+ data, numeric_data = NumericPlotBase.prepare_numeric_data(data)
125
+ if is_integer(bins):
126
+ # computes boundaries for the column
127
+ bins = HistogramPlotBase.get_bins(data._to_spark(), bins)
128
+
129
+ return numeric_data, bins
130
+
131
+ @staticmethod
132
+ def get_bins(sdf, bins):
133
+ # 'data' is a Spark DataFrame that selects all columns.
134
+ if len(sdf.columns) > 1:
135
+ min_col = F.least(*map(F.min, sdf))
136
+ max_col = F.greatest(*map(F.max, sdf))
137
+ else:
138
+ min_col = F.min(sdf.columns[-1])
139
+ max_col = F.max(sdf.columns[-1])
140
+ boundaries = sdf.select(min_col, max_col).first()
141
+
142
+ # divides the boundaries into bins
143
+ if boundaries[0] == boundaries[1]:
144
+ boundaries = (boundaries[0] - 0.5, boundaries[1] + 0.5)
145
+
146
+ return np.linspace(boundaries[0], boundaries[1], bins + 1)
147
+
148
+ @staticmethod
149
+ def compute_hist(psdf, bins):
150
+ # 'data' is a Spark DataFrame that selects one column.
151
+ assert isinstance(bins, (np.ndarray, np.generic))
152
+
153
+ sdf = psdf._internal.spark_frame
154
+ scols = []
155
+ input_column_names = []
156
+ for label in psdf._internal.column_labels:
157
+ input_column_name = name_like_string(label)
158
+ input_column_names.append(input_column_name)
159
+ scols.append(psdf._internal.spark_column_for(label).alias(input_column_name))
160
+ sdf = sdf.select(*scols)
161
+
162
+ # 1. Make the bucket output flat to:
163
+ # +----------+-------+
164
+ # |__group_id|buckets|
165
+ # +----------+-------+
166
+ # |0 |0.0 |
167
+ # |0 |0.0 |
168
+ # |0 |1.0 |
169
+ # |0 |2.0 |
170
+ # |0 |3.0 |
171
+ # |0 |3.0 |
172
+ # |1 |0.0 |
173
+ # |1 |1.0 |
174
+ # |1 |1.0 |
175
+ # |1 |2.0 |
176
+ # |1 |1.0 |
177
+ # |1 |0.0 |
178
+ # +----------+-------+
179
+ colnames = sdf.columns
180
+ bucket_names = ["__{}_bucket".format(colname) for colname in colnames]
181
+
182
+ output_df = None
183
+ for group_id, (colname, bucket_name) in enumerate(zip(colnames, bucket_names)):
184
+ # creates a Bucketizer to get corresponding bin of each value
185
+ bucketizer = Bucketizer(
186
+ splits=bins, inputCol=colname, outputCol=bucket_name, handleInvalid="skip"
187
+ )
188
+
189
+ bucket_df = bucketizer.transform(sdf)
190
+
191
+ if output_df is None:
192
+ output_df = bucket_df.select(
193
+ F.lit(group_id).alias("__group_id"), F.col(bucket_name).alias("__bucket")
194
+ )
195
+ else:
196
+ output_df = output_df.union(
197
+ bucket_df.select(
198
+ F.lit(group_id).alias("__group_id"), F.col(bucket_name).alias("__bucket")
199
+ )
200
+ )
201
+
202
+ # 2. Calculate the count based on each group and bucket.
203
+ # +----------+-------+------+
204
+ # |__group_id|buckets| count|
205
+ # +----------+-------+------+
206
+ # |0 |0.0 |2 |
207
+ # |0 |1.0 |1 |
208
+ # |0 |2.0 |1 |
209
+ # |0 |3.0 |2 |
210
+ # |1 |0.0 |2 |
211
+ # |1 |1.0 |3 |
212
+ # |1 |2.0 |1 |
213
+ # +----------+-------+------+
214
+ result = (
215
+ output_df.groupby("__group_id", "__bucket")
216
+ .agg(F.count("*").alias("count"))
217
+ .toPandas()
218
+ .sort_values(by=["__group_id", "__bucket"])
219
+ )
220
+
221
+ # 3. Fill empty bins and calculate based on each group id. From:
222
+ # +----------+--------+------+
223
+ # |__group_id|__bucket| count|
224
+ # +----------+--------+------+
225
+ # |0 |0.0 |2 |
226
+ # |0 |1.0 |1 |
227
+ # |0 |2.0 |1 |
228
+ # |0 |3.0 |2 |
229
+ # +----------+--------+------+
230
+ # +----------+--------+------+
231
+ # |__group_id|__bucket| count|
232
+ # +----------+--------+------+
233
+ # |1 |0.0 |2 |
234
+ # |1 |1.0 |3 |
235
+ # |1 |2.0 |1 |
236
+ # +----------+--------+------+
237
+ #
238
+ # to:
239
+ # +-----------------+
240
+ # |__values1__bucket|
241
+ # +-----------------+
242
+ # |2 |
243
+ # |1 |
244
+ # |1 |
245
+ # |2 |
246
+ # |0 |
247
+ # +-----------------+
248
+ # +-----------------+
249
+ # |__values2__bucket|
250
+ # +-----------------+
251
+ # |2 |
252
+ # |3 |
253
+ # |1 |
254
+ # |0 |
255
+ # |0 |
256
+ # +-----------------+
257
+ output_series = []
258
+ for i, (input_column_name, bucket_name) in enumerate(zip(input_column_names, bucket_names)):
259
+ current_bucket_result = result[result["__group_id"] == i]
260
+ # generates a pandas DF with one row for each bin
261
+ # we need this as some of the bins may be empty
262
+ indexes = pd.DataFrame({"__bucket": np.arange(0, len(bins) - 1)})
263
+ # merges the bins with counts on it and fills remaining ones with zeros
264
+ pdf = indexes.merge(current_bucket_result, how="left", on=["__bucket"]).fillna(0)[
265
+ ["count"]
266
+ ]
267
+ pdf.columns = [input_column_name]
268
+ output_series.append(pdf[input_column_name])
269
+
270
+ return output_series
271
+
272
+
273
+ class BoxPlotBase:
274
+ @staticmethod
275
+ def compute_multicol_stats(data, colnames, whis, precision):
276
+ # Computes mean, median, Q1 and Q3 with approx_percentile and precision
277
+ scol = []
278
+ for colname in colnames:
279
+ scol.append(
280
+ F.percentile_approx(
281
+ "`%s`" % colname, [0.25, 0.50, 0.75], int(1.0 / precision)
282
+ ).alias("{}_percentiles%".format(colname))
283
+ )
284
+ scol.append(F.mean("`%s`" % colname).alias("{}_mean".format(colname)))
285
+
286
+ # a_percentiles a_mean b_percentiles b_mean
287
+ # 0 [3.0, 3.2, 3.2] 3.18 [5.1, 5.9, 6.4] 5.86
288
+ pdf = data._internal.resolved_copy.spark_frame.select(*scol).toPandas()
289
+
290
+ i = 0
291
+ multicol_stats = {}
292
+ for colname in colnames:
293
+ q1, med, q3 = pdf.iloc[0, i]
294
+ iqr = q3 - q1
295
+ lfence = q1 - whis * iqr
296
+ ufence = q3 + whis * iqr
297
+ i += 1
298
+
299
+ mean = pdf.iloc[0, i]
300
+ i += 1
301
+
302
+ multicol_stats[colname] = {
303
+ "mean": mean,
304
+ "med": med,
305
+ "q1": q1,
306
+ "q3": q3,
307
+ "lfence": lfence,
308
+ "ufence": ufence,
309
+ }
310
+
311
+ return multicol_stats
312
+
313
+ @staticmethod
314
+ def compute_stats(data, colname, whis, precision):
315
+ # Computes mean, median, Q1 and Q3 with approx_percentile and precision
316
+ pdf = data._psdf._internal.resolved_copy.spark_frame.agg(
317
+ *[
318
+ F.expr(
319
+ "approx_percentile(`{}`, {}, {})".format(colname, q, int(1.0 / precision))
320
+ ).alias("{}_{}%".format(colname, int(q * 100)))
321
+ for q in [0.25, 0.50, 0.75]
322
+ ],
323
+ F.mean("`%s`" % colname).alias("{}_mean".format(colname)),
324
+ ).toPandas()
325
+
326
+ # Computes IQR and Tukey's fences
327
+ iqr = "{}_iqr".format(colname)
328
+ p75 = "{}_75%".format(colname)
329
+ p25 = "{}_25%".format(colname)
330
+ pdf.loc[:, iqr] = pdf.loc[:, p75] - pdf.loc[:, p25]
331
+ pdf.loc[:, "{}_lfence".format(colname)] = pdf.loc[:, p25] - whis * pdf.loc[:, iqr]
332
+ pdf.loc[:, "{}_ufence".format(colname)] = pdf.loc[:, p75] + whis * pdf.loc[:, iqr]
333
+
334
+ qnames = ["25%", "50%", "75%", "mean", "lfence", "ufence"]
335
+ col_summ = pdf[["{}_{}".format(colname, q) for q in qnames]]
336
+ col_summ.columns = qnames
337
+ lfence, ufence = col_summ["lfence"], col_summ["ufence"]
338
+
339
+ stats = {
340
+ "mean": col_summ["mean"].values[0],
341
+ "med": col_summ["50%"].values[0],
342
+ "q1": col_summ["25%"].values[0],
343
+ "q3": col_summ["75%"].values[0],
344
+ }
345
+
346
+ return stats, (lfence.values[0], ufence.values[0])
347
+
348
+ @staticmethod
349
+ def multicol_outliers(data, multicol_stats):
350
+ scols = {}
351
+ for colname, stats in multicol_stats.items():
352
+ scols["__{}_outlier".format(colname)] = ~F.col("`%s`" % colname).between(
353
+ stats["lfence"], stats["ufence"]
354
+ )
355
+ return data._internal.resolved_copy.spark_frame.withColumns(scols)
356
+
357
+ @staticmethod
358
+ def outliers(data, colname, lfence, ufence):
359
+ # Builds expression to identify outliers
360
+ expression = F.col("`%s`" % colname).between(lfence, ufence)
361
+ # Creates a column to flag rows as outliers or not
362
+ return data._psdf._internal.resolved_copy.spark_frame.withColumn(
363
+ "__{}_outlier".format(colname), ~expression
364
+ )
365
+
366
+ @staticmethod
367
+ def calc_multicol_whiskers(colnames, multicol_outliers):
368
+ # Computes min and max values of non-outliers - the whiskers
369
+ scols = []
370
+ for colname in colnames:
371
+ outlier_colname = "__{}_outlier".format(colname)
372
+ scols.append(
373
+ F.min(F.when(~F.col(outlier_colname), F.col(colname)).otherwise(F.lit(None))).alias(
374
+ "__{}_min".format(colname)
375
+ )
376
+ )
377
+ scols.append(
378
+ F.max(F.when(~F.col(outlier_colname), F.col(colname)).otherwise(F.lit(None))).alias(
379
+ "__{}_max".format(colname)
380
+ )
381
+ )
382
+
383
+ pdf = multicol_outliers.select(*scols).toPandas()
384
+
385
+ i = 0
386
+ whiskers = {}
387
+ for colname in colnames:
388
+ min = pdf.iloc[0, i]
389
+ i += 1
390
+ max = pdf.iloc[0, i]
391
+ i += 1
392
+ whiskers[colname] = {
393
+ "min": min,
394
+ "max": max,
395
+ }
396
+
397
+ return whiskers
398
+
399
+ @staticmethod
400
+ def calc_whiskers(colname, outliers):
401
+ # Computes min and max values of non-outliers - the whiskers
402
+ minmax = (
403
+ outliers.filter("not `__{}_outlier`".format(colname))
404
+ .agg(F.min("`%s`" % colname).alias("min"), F.max(colname).alias("max"))
405
+ .toPandas()
406
+ )
407
+ return minmax.iloc[0][["min", "max"]].values
408
+
409
+ @staticmethod
410
+ def get_fliers(colname, outliers, min_val):
411
+ # Filters only the outliers, should "showfliers" be True
412
+ fliers_df = outliers.filter("`__{}_outlier`".format(colname))
413
+
414
+ # If it shows fliers, take the top 1k with highest absolute values
415
+ # Here we normalize the values by subtracting the minimum value from
416
+ # each, and use absolute values.
417
+ order_col = F.abs(F.col("`{}`".format(colname)) - min_val.item())
418
+ fliers = (
419
+ fliers_df.select(F.col("`{}`".format(colname)))
420
+ .orderBy(order_col)
421
+ .limit(1001)
422
+ .toPandas()[colname]
423
+ .values
424
+ )
425
+
426
+ return fliers
427
+
428
+
429
+ class KdePlotBase(NumericPlotBase):
430
+ @staticmethod
431
+ def prepare_kde_data(data):
432
+ _, numeric_data = NumericPlotBase.prepare_numeric_data(data)
433
+ return numeric_data
434
+
435
+ @staticmethod
436
+ def get_ind(sdf, ind):
437
+ def calc_min_max():
438
+ if len(sdf.columns) > 1:
439
+ min_col = F.least(*map(F.min, sdf))
440
+ max_col = F.greatest(*map(F.max, sdf))
441
+ else:
442
+ min_col = F.min(sdf.columns[-1])
443
+ max_col = F.max(sdf.columns[-1])
444
+ return sdf.select(min_col, max_col).first()
445
+
446
+ if ind is None:
447
+ min_val, max_val = calc_min_max()
448
+ sample_range = max_val - min_val
449
+ ind = np.linspace(
450
+ min_val - 0.5 * sample_range,
451
+ max_val + 0.5 * sample_range,
452
+ 1000,
453
+ )
454
+ elif is_integer(ind):
455
+ min_val, max_val = calc_min_max()
456
+ sample_range = max_val - min_val
457
+ ind = np.linspace(
458
+ min_val - 0.5 * sample_range,
459
+ max_val + 0.5 * sample_range,
460
+ ind,
461
+ )
462
+ return ind
463
+
464
+ @staticmethod
465
+ def compute_kde(sdf, bw_method=None, ind=None):
466
+ # 'sdf' is a Spark DataFrame that selects one column.
467
+
468
+ # Using RDD is slow so we might have to change it to Dataset based implementation
469
+ # once Spark has that implementation.
470
+ sample = sdf.rdd.map(lambda x: float(x[0]))
471
+ kd = KernelDensity()
472
+ kd.setSample(sample)
473
+
474
+ assert isinstance(bw_method, (int, float)), "'bw_method' must be set as a scalar number."
475
+
476
+ if bw_method is not None:
477
+ # Match the bandwidth with Spark.
478
+ kd.setBandwidth(float(bw_method))
479
+ return kd.estimate(list(map(float, ind)))
480
+
481
+
482
+ class PandasOnSparkPlotAccessor(PandasObject):
483
+ """
484
+ Series/Frames plotting accessor and method.
485
+
486
+ Uses the backend specified by the
487
+ option ``plotting.backend``. By default, plotly is used.
488
+
489
+ Plotting methods can also be accessed by calling the accessor as a method
490
+ with the ``kind`` argument:
491
+ ``s.plot(kind='hist')`` is equivalent to ``s.plot.hist()``
492
+ """
493
+
494
+ pandas_plot_data_map = {
495
+ "pie": TopNPlotBase().get_top_n,
496
+ "bar": TopNPlotBase().get_top_n,
497
+ "barh": TopNPlotBase().get_top_n,
498
+ "scatter": TopNPlotBase().get_top_n,
499
+ "area": SampledPlotBase().get_sampled,
500
+ "line": SampledPlotBase().get_sampled,
501
+ }
502
+ _backends = {} # type: ignore[var-annotated]
503
+
504
+ def __init__(self, data):
505
+ self.data = data
506
+
507
+ @staticmethod
508
+ def _find_backend(backend):
509
+ """
510
+ Find a pandas-on-Spark plotting backend
511
+ """
512
+ try:
513
+ return PandasOnSparkPlotAccessor._backends[backend]
514
+ except KeyError:
515
+ try:
516
+ module = importlib.import_module(backend)
517
+ except ImportError:
518
+ # We re-raise later on.
519
+ pass
520
+ else:
521
+ if hasattr(module, "plot") or hasattr(module, "plot_pandas_on_spark"):
522
+ # Validate that the interface is implemented when the option
523
+ # is set, rather than at plot time.
524
+ PandasOnSparkPlotAccessor._backends[backend] = module
525
+ return module
526
+
527
+ raise ValueError(
528
+ "Could not find plotting backend '{backend}'. Ensure that you've installed "
529
+ "the package providing the '{backend}' entrypoint, or that the package has a "
530
+ "top-level `.plot` method.".format(backend=backend)
531
+ )
532
+
533
+ @staticmethod
534
+ def _get_plot_backend(backend=None):
535
+ backend = backend or get_option("plotting.backend")
536
+ # Shortcut
537
+ if backend in PandasOnSparkPlotAccessor._backends:
538
+ return PandasOnSparkPlotAccessor._backends[backend]
539
+
540
+ if backend == "matplotlib":
541
+ # Because matplotlib is an optional dependency,
542
+ # we need to attempt an import here to raise an ImportError if needed.
543
+ try:
544
+ # test if matplotlib can be imported
545
+ import matplotlib # noqa: F401
546
+ from pyspark.pandas.plot import matplotlib as module
547
+ except ImportError:
548
+ raise ImportError(
549
+ "matplotlib is required for plotting when the "
550
+ "default backend 'matplotlib' is selected."
551
+ ) from None
552
+
553
+ PandasOnSparkPlotAccessor._backends["matplotlib"] = module
554
+ elif backend == "plotly":
555
+ try:
556
+ # test if plotly can be imported
557
+ import plotly # noqa: F401
558
+ from pyspark.pandas.plot import plotly as module
559
+ except ImportError:
560
+ raise ImportError(
561
+ "plotly is required for plotting when the "
562
+ "default backend 'plotly' is selected."
563
+ ) from None
564
+
565
+ PandasOnSparkPlotAccessor._backends["plotly"] = module
566
+ else:
567
+ module = PandasOnSparkPlotAccessor._find_backend(backend)
568
+ PandasOnSparkPlotAccessor._backends[backend] = module
569
+ return module
570
+
571
+ def __call__(self, kind="line", backend=None, **kwargs):
572
+ plot_backend = PandasOnSparkPlotAccessor._get_plot_backend(backend)
573
+ plot_data = self.data
574
+
575
+ kind = {"density": "kde"}.get(kind, kind)
576
+ if hasattr(plot_backend, "plot_pandas_on_spark"):
577
+ # use if there's pandas-on-Spark specific method.
578
+ return plot_backend.plot_pandas_on_spark(plot_data, kind=kind, **kwargs)
579
+ else:
580
+ # fallback to use pandas'
581
+ if not PandasOnSparkPlotAccessor.pandas_plot_data_map[kind]:
582
+ raise NotImplementedError(
583
+ "'%s' plot is not supported with '%s' plot "
584
+ "backend yet." % (kind, plot_backend.__name__)
585
+ )
586
+ plot_data = PandasOnSparkPlotAccessor.pandas_plot_data_map[kind](plot_data)
587
+ return plot_backend.plot(plot_data, kind=kind, **kwargs)
588
+
589
+ def line(self, x=None, y=None, **kwargs):
590
+ """
591
+ Plot DataFrame/Series as lines.
592
+
593
+ This function is useful to plot lines using Series's values
594
+ as coordinates.
595
+
596
+ Parameters
597
+ ----------
598
+ x : int or str, optional
599
+ Columns to use for the horizontal axis.
600
+ Either the location or the label of the columns to be used.
601
+ By default, it will use the DataFrame indices.
602
+ y : int, str, or list of them, optional
603
+ The values to be plotted.
604
+ Either the location or the label of the columns to be used.
605
+ By default, it will use the remaining DataFrame numeric columns.
606
+ **kwds
607
+ Keyword arguments to pass on to :meth:`Series.plot` or :meth:`DataFrame.plot`.
608
+
609
+ Returns
610
+ -------
611
+ :class:`plotly.graph_objs.Figure`
612
+ Return an custom object when ``backend!=plotly``.
613
+ Return an ndarray when ``subplots=True`` (matplotlib-only).
614
+
615
+ See Also
616
+ --------
617
+ plotly.express.line : Plot y versus x as lines and/or markers (plotly).
618
+ matplotlib.pyplot.plot : Plot y versus x as lines and/or markers (matplotlib).
619
+
620
+ Examples
621
+ --------
622
+ Basic plot.
623
+
624
+ For Series:
625
+
626
+ .. plotly::
627
+
628
+ >>> s = ps.Series([1, 3, 2])
629
+ >>> s.plot.line() # doctest: +SKIP
630
+
631
+ For DataFrame:
632
+
633
+ .. plotly::
634
+
635
+ The following example shows the populations for some animals
636
+ over the years.
637
+
638
+ >>> df = ps.DataFrame({'pig': [20, 18, 489, 675, 1776],
639
+ ... 'horse': [4, 25, 281, 600, 1900]},
640
+ ... index=[1990, 1997, 2003, 2009, 2014])
641
+ >>> df.plot.line() # doctest: +SKIP
642
+
643
+ .. plotly::
644
+
645
+ The following example shows the relationship between both
646
+ populations.
647
+
648
+ >>> df = ps.DataFrame({'pig': [20, 18, 489, 675, 1776],
649
+ ... 'horse': [4, 25, 281, 600, 1900]},
650
+ ... index=[1990, 1997, 2003, 2009, 2014])
651
+ >>> df.plot.line(x='pig', y='horse') # doctest: +SKIP
652
+ """
653
+ return self(kind="line", x=x, y=y, **kwargs)
654
+
655
+ def bar(self, x=None, y=None, **kwds):
656
+ """
657
+ Vertical bar plot.
658
+
659
+ Parameters
660
+ ----------
661
+ x : label or position, optional
662
+ Allows plotting of one column versus another.
663
+ If not specified, the index of the DataFrame is used.
664
+ y : label or position, optional
665
+ Allows plotting of one column versus another.
666
+ If not specified, all numerical columns are used.
667
+ **kwds : optional
668
+ Additional keyword arguments are documented in
669
+ :meth:`pyspark.pandas.Series.plot` or
670
+ :meth:`pyspark.pandas.DataFrame.plot`.
671
+
672
+ Returns
673
+ -------
674
+ :class:`plotly.graph_objs.Figure`
675
+ Return an custom object when ``backend!=plotly``.
676
+ Return an ndarray when ``subplots=True`` (matplotlib-only).
677
+
678
+ Examples
679
+ --------
680
+ Basic plot.
681
+
682
+ For Series:
683
+
684
+ .. plotly::
685
+
686
+ >>> s = ps.Series([1, 3, 2])
687
+ >>> s.plot.bar() # doctest: +SKIP
688
+
689
+ For DataFrame:
690
+
691
+ .. plotly::
692
+
693
+ >>> df = ps.DataFrame({'lab': ['A', 'B', 'C'], 'val': [10, 30, 20]})
694
+ >>> df.plot.bar(x='lab', y='val') # doctest: +SKIP
695
+
696
+ Plot a whole dataframe to a bar plot. Each column is stacked with a
697
+ distinct color along the horizontal axis.
698
+
699
+ .. plotly::
700
+
701
+ >>> speed = [0.1, 17.5, 40, 48, 52, 69, 88]
702
+ >>> lifespan = [2, 8, 70, 1.5, 25, 12, 28]
703
+ >>> index = ['snail', 'pig', 'elephant',
704
+ ... 'rabbit', 'giraffe', 'coyote', 'horse']
705
+ >>> df = ps.DataFrame({'speed': speed,
706
+ ... 'lifespan': lifespan}, index=index)
707
+ >>> df.plot.bar() # doctest: +SKIP
708
+
709
+ Instead of stacking, the figure can be split by column with plotly
710
+ APIs.
711
+
712
+ .. plotly::
713
+
714
+ >>> from plotly.subplots import make_subplots
715
+ >>> speed = [0.1, 17.5, 40, 48, 52, 69, 88]
716
+ >>> lifespan = [2, 8, 70, 1.5, 25, 12, 28]
717
+ >>> index = ['snail', 'pig', 'elephant',
718
+ ... 'rabbit', 'giraffe', 'coyote', 'horse']
719
+ >>> df = ps.DataFrame({'speed': speed,
720
+ ... 'lifespan': lifespan}, index=index)
721
+ >>> fig = (make_subplots(rows=2, cols=1)
722
+ ... .add_trace(df.plot.bar(y='speed').data[0], row=1, col=1)
723
+ ... .add_trace(df.plot.bar(y='speed').data[0], row=1, col=1)
724
+ ... .add_trace(df.plot.bar(y='lifespan').data[0], row=2, col=1))
725
+ >>> fig # doctest: +SKIP
726
+
727
+ Plot a single column.
728
+
729
+ .. plotly::
730
+
731
+ >>> speed = [0.1, 17.5, 40, 48, 52, 69, 88]
732
+ >>> lifespan = [2, 8, 70, 1.5, 25, 12, 28]
733
+ >>> index = ['snail', 'pig', 'elephant',
734
+ ... 'rabbit', 'giraffe', 'coyote', 'horse']
735
+ >>> df = ps.DataFrame({'speed': speed,
736
+ ... 'lifespan': lifespan}, index=index)
737
+ >>> df.plot.bar(y='speed') # doctest: +SKIP
738
+
739
+ Plot only selected categories for the DataFrame.
740
+
741
+ .. plotly::
742
+
743
+ >>> speed = [0.1, 17.5, 40, 48, 52, 69, 88]
744
+ >>> lifespan = [2, 8, 70, 1.5, 25, 12, 28]
745
+ >>> index = ['snail', 'pig', 'elephant',
746
+ ... 'rabbit', 'giraffe', 'coyote', 'horse']
747
+ >>> df = ps.DataFrame({'speed': speed,
748
+ ... 'lifespan': lifespan}, index=index)
749
+ >>> df.plot.bar(x='lifespan') # doctest: +SKIP
750
+ """
751
+ from pyspark.pandas import DataFrame, Series
752
+
753
+ if isinstance(self.data, Series):
754
+ return self(kind="bar", **kwds)
755
+ elif isinstance(self.data, DataFrame):
756
+ return self(kind="bar", x=x, y=y, **kwds)
757
+
758
+ def barh(self, x=None, y=None, **kwargs):
759
+ """
760
+ Make a horizontal bar plot.
761
+
762
+ A horizontal bar plot is a plot that presents quantitative data with
763
+ rectangular bars with lengths proportional to the values that they
764
+ represent. A bar plot shows comparisons among discrete categories. One
765
+ axis of the plot shows the specific categories being compared, and the
766
+ other axis represents a measured value.
767
+
768
+ Parameters
769
+ ----------
770
+ x : label or position, default DataFrame.index
771
+ Column to be used for categories.
772
+ y : label or position, default All numeric columns in dataframe
773
+ Columns to be plotted from the DataFrame.
774
+ **kwds
775
+ Keyword arguments to pass on to
776
+ :meth:`pyspark.pandas.DataFrame.plot` or :meth:`pyspark.pandas.Series.plot`.
777
+
778
+ Returns
779
+ -------
780
+ :class:`plotly.graph_objs.Figure`
781
+ Return an custom object when ``backend!=plotly``.
782
+ Return an ndarray when ``subplots=True`` (matplotlib-only).
783
+
784
+ See Also
785
+ --------
786
+ plotly.express.bar : Plot a vertical bar plot using plotly.
787
+ matplotlib.axes.Axes.bar : Plot a vertical bar plot using matplotlib.
788
+
789
+ Examples
790
+ --------
791
+ For Series:
792
+
793
+ .. plotly::
794
+
795
+ >>> df = ps.DataFrame({'lab': ['A', 'B', 'C'], 'val': [10, 30, 20]})
796
+ >>> df.val.plot.barh() # doctest: +SKIP
797
+
798
+ For DataFrame:
799
+
800
+ .. plotly::
801
+
802
+ >>> df = ps.DataFrame({'lab': ['A', 'B', 'C'], 'val': [10, 30, 20]})
803
+ >>> df.plot.barh(x='lab', y='val') # doctest: +SKIP
804
+
805
+ Plot a whole DataFrame to a horizontal bar plot
806
+
807
+ .. plotly::
808
+
809
+ >>> speed = [0.1, 17.5, 40, 48, 52, 69, 88]
810
+ >>> lifespan = [2, 8, 70, 1.5, 25, 12, 28]
811
+ >>> index = ['snail', 'pig', 'elephant',
812
+ ... 'rabbit', 'giraffe', 'coyote', 'horse']
813
+ >>> df = ps.DataFrame({'speed': speed,
814
+ ... 'lifespan': lifespan}, index=index)
815
+ >>> df.plot.barh() # doctest: +SKIP
816
+
817
+ Plot a column of the DataFrame to a horizontal bar plot
818
+
819
+ .. plotly::
820
+
821
+ >>> speed = [0.1, 17.5, 40, 48, 52, 69, 88]
822
+ >>> lifespan = [2, 8, 70, 1.5, 25, 12, 28]
823
+ >>> index = ['snail', 'pig', 'elephant',
824
+ ... 'rabbit', 'giraffe', 'coyote', 'horse']
825
+ >>> df = ps.DataFrame({'speed': speed,
826
+ ... 'lifespan': lifespan}, index=index)
827
+ >>> df.plot.barh(y='speed') # doctest: +SKIP
828
+
829
+ Plot DataFrame versus the desired column
830
+
831
+ .. plotly::
832
+
833
+ >>> speed = [0.1, 17.5, 40, 48, 52, 69, 88]
834
+ >>> lifespan = [2, 8, 70, 1.5, 25, 12, 28]
835
+ >>> index = ['snail', 'pig', 'elephant',
836
+ ... 'rabbit', 'giraffe', 'coyote', 'horse']
837
+ >>> df = ps.DataFrame({'speed': speed,
838
+ ... 'lifespan': lifespan}, index=index)
839
+ >>> df.plot.barh(x='lifespan') # doctest: +SKIP
840
+ """
841
+ from pyspark.pandas import DataFrame, Series
842
+
843
+ if isinstance(self.data, Series):
844
+ return self(kind="barh", **kwargs)
845
+ elif isinstance(self.data, DataFrame):
846
+ return self(kind="barh", x=x, y=y, **kwargs)
847
+
848
+ def box(self, **kwds):
849
+ """
850
+ Make a box plot of the Series columns.
851
+
852
+ Parameters
853
+ ----------
854
+ **kwds : optional
855
+ Additional keyword arguments are documented in
856
+ :meth:`pyspark.pandas.Series.plot`.
857
+
858
+ precision: scalar, default = 0.01
859
+ This argument is used by pandas-on-Spark to compute approximate statistics
860
+ for building a boxplot. Use *smaller* values to get more precise
861
+ statistics (matplotlib-only).
862
+
863
+ Returns
864
+ -------
865
+ :class:`plotly.graph_objs.Figure`
866
+ Return an custom object when ``backend!=plotly``.
867
+ Return an ndarray when ``subplots=True`` (matplotlib-only).
868
+
869
+ Notes
870
+ -----
871
+ There are behavior differences between pandas-on-Spark and pandas.
872
+
873
+ * pandas-on-Spark computes approximate statistics - expect differences between
874
+ pandas and pandas-on-Spark boxplots, especially regarding 1st and 3rd quartiles.
875
+ * The `whis` argument is only supported as a single number.
876
+ * pandas-on-Spark doesn't support the following argument(s) (matplotlib-only).
877
+
878
+ * `bootstrap` argument is not supported
879
+ * `autorange` argument is not supported
880
+
881
+ Examples
882
+ --------
883
+ Draw a box plot from a DataFrame with four columns of randomly
884
+ generated data.
885
+
886
+ For Series:
887
+
888
+ .. plotly::
889
+
890
+ >>> data = np.random.randn(25, 4)
891
+ >>> df = ps.DataFrame(data, columns=list('ABCD'))
892
+ >>> df['A'].plot.box() # doctest: +SKIP
893
+
894
+ This is an unsupported function for DataFrame type
895
+ """
896
+ from pyspark.pandas import DataFrame, Series
897
+
898
+ if isinstance(self.data, (Series, DataFrame)):
899
+ return self(kind="box", **kwds)
900
+
901
+ def hist(self, bins=10, **kwds):
902
+ """
903
+ Draw one histogram of the DataFrame’s columns.
904
+ A `histogram`_ is a representation of the distribution of data.
905
+ This function calls :meth:`plotting.backend.plot`,
906
+ on each series in the DataFrame, resulting in one histogram per column.
907
+
908
+ .. _histogram: https://en.wikipedia.org/wiki/Histogram
909
+
910
+ Parameters
911
+ ----------
912
+ bins : integer or sequence, default 10
913
+ Number of histogram bins to be used. If an integer is given, bins + 1
914
+ bin edges are calculated and returned. If bins is a sequence, it gives
915
+ bin edges, including left edge of first bin and right edge of last
916
+ bin. In this case, bins are returned unmodified.
917
+ **kwds
918
+ All other plotting keyword arguments to be passed to
919
+ plotting backend.
920
+
921
+ Returns
922
+ -------
923
+ :class:`plotly.graph_objs.Figure`
924
+ Return an custom object when ``backend!=plotly``.
925
+ Return an ndarray when ``subplots=True`` (matplotlib-only).
926
+
927
+ Examples
928
+ --------
929
+ Basic plot.
930
+
931
+ For Series:
932
+
933
+ .. plotly::
934
+
935
+ >>> s = ps.Series([1, 3, 2])
936
+ >>> s.plot.hist() # doctest: +SKIP
937
+
938
+ For DataFrame:
939
+
940
+ .. plotly::
941
+
942
+ >>> df = pd.DataFrame(
943
+ ... np.random.randint(1, 7, 6000),
944
+ ... columns=['one'])
945
+ >>> df['two'] = df['one'] + np.random.randint(1, 7, 6000)
946
+ >>> df = ps.from_pandas(df)
947
+ >>> df.plot.hist(bins=12, alpha=0.5) # doctest: +SKIP
948
+ """
949
+ return self(kind="hist", bins=bins, **kwds)
950
+
951
+ def kde(self, bw_method=None, ind=None, **kwargs):
952
+ """
953
+ Generate Kernel Density Estimate plot using Gaussian kernels.
954
+
955
+ Parameters
956
+ ----------
957
+ bw_method : scalar
958
+ The method used to calculate the estimator bandwidth.
959
+ See KernelDensity in PySpark for more information.
960
+ ind : NumPy array or integer, optional
961
+ Evaluation points for the estimated PDF. If None (default),
962
+ 1000 equally spaced points are used. If `ind` is a NumPy array, the
963
+ KDE is evaluated at the points passed. If `ind` is an integer,
964
+ `ind` number of equally spaced points are used.
965
+ **kwargs : optional
966
+ Keyword arguments to pass on to :meth:`pandas-on-Spark.Series.plot`.
967
+
968
+ Returns
969
+ -------
970
+ :class:`plotly.graph_objs.Figure`
971
+ Return an custom object when ``backend!=plotly``.
972
+ Return an ndarray when ``subplots=True`` (matplotlib-only).
973
+
974
+ Examples
975
+ --------
976
+ A scalar bandwidth should be specified. Using a small bandwidth value can
977
+ lead to over-fitting, while using a large bandwidth value may result
978
+ in under-fitting:
979
+
980
+ .. plotly::
981
+
982
+ >>> s = ps.Series([1, 2, 2.5, 3, 3.5, 4, 5])
983
+ >>> s.plot.kde(bw_method=0.3) # doctest: +SKIP
984
+
985
+ .. plotly::
986
+
987
+ >>> s = ps.Series([1, 2, 2.5, 3, 3.5, 4, 5])
988
+ >>> s.plot.kde(bw_method=3) # doctest: +SKIP
989
+
990
+ The `ind` parameter determines the evaluation points for the
991
+ plot of the estimated KDF:
992
+
993
+ .. plotly::
994
+
995
+ >>> s = ps.Series([1, 2, 2.5, 3, 3.5, 4, 5])
996
+ >>> s.plot.kde(ind=[1, 2, 3, 4, 5], bw_method=0.3) # doctest: +SKIP
997
+
998
+ For DataFrame, it works in the same way as Series:
999
+
1000
+ .. plotly::
1001
+
1002
+ >>> df = ps.DataFrame({
1003
+ ... 'x': [1, 2, 2.5, 3, 3.5, 4, 5],
1004
+ ... 'y': [4, 4, 4.5, 5, 5.5, 6, 6],
1005
+ ... })
1006
+ >>> df.plot.kde(bw_method=0.3) # doctest: +SKIP
1007
+
1008
+ .. plotly::
1009
+
1010
+ >>> df = ps.DataFrame({
1011
+ ... 'x': [1, 2, 2.5, 3, 3.5, 4, 5],
1012
+ ... 'y': [4, 4, 4.5, 5, 5.5, 6, 6],
1013
+ ... })
1014
+ >>> df.plot.kde(bw_method=3) # doctest: +SKIP
1015
+
1016
+ .. plotly::
1017
+
1018
+ >>> df = ps.DataFrame({
1019
+ ... 'x': [1, 2, 2.5, 3, 3.5, 4, 5],
1020
+ ... 'y': [4, 4, 4.5, 5, 5.5, 6, 6],
1021
+ ... })
1022
+ >>> df.plot.kde(ind=[1, 2, 3, 4, 5, 6], bw_method=0.3) # doctest: +SKIP
1023
+ """
1024
+ return self(kind="kde", bw_method=bw_method, ind=ind, **kwargs)
1025
+
1026
+ density = kde
1027
+
1028
+ def area(self, x=None, y=None, **kwds):
1029
+ """
1030
+ Draw a stacked area plot.
1031
+
1032
+ An area plot displays quantitative data visually.
1033
+ This function wraps the plotly area function.
1034
+
1035
+ Parameters
1036
+ ----------
1037
+ x : label or position, optional
1038
+ Coordinates for the X axis. By default it uses the index.
1039
+ y : label or position, optional
1040
+ Column to plot. By default it uses all columns.
1041
+ stacked : bool, default True
1042
+ Area plots are stacked by default. Set to False to create an
1043
+ unstacked plot (matplotlib-only).
1044
+ **kwds : optional
1045
+ Additional keyword arguments are documented in
1046
+ :meth:`DataFrame.plot`.
1047
+
1048
+ Returns
1049
+ -------
1050
+ :class:`plotly.graph_objs.Figure`
1051
+ Return an custom object when ``backend!=plotly``.
1052
+ Return an ndarray when ``subplots=True`` (matplotlib-only).
1053
+
1054
+ Examples
1055
+ --------
1056
+
1057
+ For Series
1058
+
1059
+ .. plotly::
1060
+
1061
+ >>> df = ps.DataFrame({
1062
+ ... 'sales': [3, 2, 3, 9, 10, 6],
1063
+ ... 'signups': [5, 5, 6, 12, 14, 13],
1064
+ ... 'visits': [20, 42, 28, 62, 81, 50],
1065
+ ... }, index=pd.date_range(start='2018/01/01', end='2018/07/01',
1066
+ ... freq='M'))
1067
+ >>> df.sales.plot.area() # doctest: +SKIP
1068
+
1069
+ For DataFrame
1070
+
1071
+ .. plotly::
1072
+
1073
+ >>> df = ps.DataFrame({
1074
+ ... 'sales': [3, 2, 3, 9, 10, 6],
1075
+ ... 'signups': [5, 5, 6, 12, 14, 13],
1076
+ ... 'visits': [20, 42, 28, 62, 81, 50],
1077
+ ... }, index=pd.date_range(start='2018/01/01', end='2018/07/01',
1078
+ ... freq='M'))
1079
+ >>> df.plot.area() # doctest: +SKIP
1080
+ """
1081
+ from pyspark.pandas import DataFrame, Series
1082
+
1083
+ if isinstance(self.data, Series):
1084
+ return self(kind="area", **kwds)
1085
+ elif isinstance(self.data, DataFrame):
1086
+ return self(kind="area", x=x, y=y, **kwds)
1087
+
1088
+ def pie(self, **kwds):
1089
+ """
1090
+ Generate a pie plot.
1091
+
1092
+ A pie plot is a proportional representation of the numerical data in a
1093
+ column. This function wraps :meth:`plotly.express.pie` for the
1094
+ specified column.
1095
+
1096
+ Parameters
1097
+ ----------
1098
+ y : int or label, optional
1099
+ Label or position of the column to plot.
1100
+ If not provided, ``subplots=True`` argument must be passed (matplotlib-only).
1101
+ **kwds
1102
+ Keyword arguments to pass on to :meth:`pandas-on-Spark.Series.plot`.
1103
+
1104
+ Returns
1105
+ -------
1106
+ :class:`plotly.graph_objs.Figure`
1107
+ Return an custom object when ``backend!=plotly``.
1108
+ Return an ndarray when ``subplots=True`` (matplotlib-only).
1109
+
1110
+ Examples
1111
+ --------
1112
+
1113
+ For Series:
1114
+
1115
+ .. plotly::
1116
+
1117
+ >>> df = ps.DataFrame({'mass': [0.330, 4.87, 5.97],
1118
+ ... 'radius': [2439.7, 6051.8, 6378.1]},
1119
+ ... index=['Mercury', 'Venus', 'Earth'])
1120
+ >>> df.mass.plot.pie() # doctest: +SKIP
1121
+
1122
+
1123
+ For DataFrame:
1124
+
1125
+ .. plotly::
1126
+
1127
+ >>> df = ps.DataFrame({'mass': [0.330, 4.87, 5.97],
1128
+ ... 'radius': [2439.7, 6051.8, 6378.1]},
1129
+ ... index=['Mercury', 'Venus', 'Earth'])
1130
+ >>> df.plot.pie(y='mass') # doctest: +SKIP
1131
+ """
1132
+ from pyspark.pandas import DataFrame, Series
1133
+
1134
+ if isinstance(self.data, Series):
1135
+ return self(kind="pie", **kwds)
1136
+ else:
1137
+ # pandas will raise an error if y is None and subplots if not True
1138
+ if (
1139
+ isinstance(self.data, DataFrame)
1140
+ and kwds.get("y", None) is None
1141
+ and not kwds.get("subplots", False)
1142
+ ):
1143
+ raise ValueError(
1144
+ "pie requires either y column or 'subplots=True' (matplotlib-only)"
1145
+ )
1146
+ return self(kind="pie", **kwds)
1147
+
1148
+ def scatter(self, x, y, **kwds):
1149
+ """
1150
+ Create a scatter plot with varying marker point size and color.
1151
+
1152
+ The coordinates of each point are defined by two dataframe columns and
1153
+ filled circles are used to represent each point. This kind of plot is
1154
+ useful to see complex correlations between two variables. Points could
1155
+ be for instance natural 2D coordinates like longitude and latitude in
1156
+ a map or, in general, any pair of metrics that can be plotted against
1157
+ each other.
1158
+
1159
+ Parameters
1160
+ ----------
1161
+ x : int or str
1162
+ The column name or column position to be used as horizontal
1163
+ coordinates for each point.
1164
+ y : int or str
1165
+ The column name or column position to be used as vertical
1166
+ coordinates for each point.
1167
+ s : scalar or array_like, optional
1168
+ (matplotlib-only).
1169
+ c : str, int or array_like, optional
1170
+ (matplotlib-only).
1171
+
1172
+ **kwds: Optional
1173
+ Keyword arguments to pass on to :meth:`pyspark.pandas.DataFrame.plot`.
1174
+
1175
+ Returns
1176
+ -------
1177
+ :class:`plotly.graph_objs.Figure`
1178
+ Return an custom object when ``backend!=plotly``.
1179
+ Return an ndarray when ``subplots=True`` (matplotlib-only).
1180
+
1181
+ See Also
1182
+ --------
1183
+ plotly.express.scatter : Scatter plot using multiple input data
1184
+ formats (plotly).
1185
+ matplotlib.pyplot.scatter : Scatter plot using multiple input data
1186
+ formats (matplotlib).
1187
+
1188
+ Examples
1189
+ --------
1190
+ Let's see how to draw a scatter plot using coordinates from the values
1191
+ in a DataFrame's columns.
1192
+
1193
+ .. plotly::
1194
+
1195
+ >>> df = ps.DataFrame([[5.1, 3.5, 0], [4.9, 3.0, 0], [7.0, 3.2, 1],
1196
+ ... [6.4, 3.2, 1], [5.9, 3.0, 2]],
1197
+ ... columns=['length', 'width', 'species'])
1198
+ >>> df.plot.scatter(x='length', y='width') # doctest: +SKIP
1199
+
1200
+ And now with dark scheme:
1201
+
1202
+ .. plotly::
1203
+
1204
+ >>> df = ps.DataFrame([[5.1, 3.5, 0], [4.9, 3.0, 0], [7.0, 3.2, 1],
1205
+ ... [6.4, 3.2, 1], [5.9, 3.0, 2]],
1206
+ ... columns=['length', 'width', 'species'])
1207
+ >>> fig = df.plot.scatter(x='length', y='width')
1208
+ >>> fig.update_layout(template="plotly_dark") # doctest: +SKIP
1209
+ """
1210
+ return self(kind="scatter", x=x, y=y, **kwds)
1211
+
1212
+ def hexbin(self, **kwds):
1213
+ return unsupported_function(class_name="pd.DataFrame", method_name="hexbin")()