snowpark-connect 0.20.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of snowpark-connect might be problematic. Click here for more details.

Files changed (879) hide show
  1. snowflake/snowpark_connect/__init__.py +23 -0
  2. snowflake/snowpark_connect/analyze_plan/__init__.py +3 -0
  3. snowflake/snowpark_connect/analyze_plan/map_tree_string.py +38 -0
  4. snowflake/snowpark_connect/column_name_handler.py +735 -0
  5. snowflake/snowpark_connect/config.py +576 -0
  6. snowflake/snowpark_connect/constants.py +47 -0
  7. snowflake/snowpark_connect/control_server.py +52 -0
  8. snowflake/snowpark_connect/dataframe_name_handler.py +54 -0
  9. snowflake/snowpark_connect/date_time_format_mapping.py +399 -0
  10. snowflake/snowpark_connect/empty_dataframe.py +18 -0
  11. snowflake/snowpark_connect/error/__init__.py +11 -0
  12. snowflake/snowpark_connect/error/error_mapping.py +6174 -0
  13. snowflake/snowpark_connect/error/error_utils.py +321 -0
  14. snowflake/snowpark_connect/error/exceptions.py +24 -0
  15. snowflake/snowpark_connect/execute_plan/__init__.py +3 -0
  16. snowflake/snowpark_connect/execute_plan/map_execution_command.py +204 -0
  17. snowflake/snowpark_connect/execute_plan/map_execution_root.py +173 -0
  18. snowflake/snowpark_connect/execute_plan/utils.py +183 -0
  19. snowflake/snowpark_connect/expression/__init__.py +3 -0
  20. snowflake/snowpark_connect/expression/literal.py +90 -0
  21. snowflake/snowpark_connect/expression/map_cast.py +343 -0
  22. snowflake/snowpark_connect/expression/map_expression.py +293 -0
  23. snowflake/snowpark_connect/expression/map_extension.py +104 -0
  24. snowflake/snowpark_connect/expression/map_sql_expression.py +633 -0
  25. snowflake/snowpark_connect/expression/map_udf.py +142 -0
  26. snowflake/snowpark_connect/expression/map_unresolved_attribute.py +241 -0
  27. snowflake/snowpark_connect/expression/map_unresolved_extract_value.py +85 -0
  28. snowflake/snowpark_connect/expression/map_unresolved_function.py +9450 -0
  29. snowflake/snowpark_connect/expression/map_unresolved_star.py +218 -0
  30. snowflake/snowpark_connect/expression/map_update_fields.py +164 -0
  31. snowflake/snowpark_connect/expression/map_window_function.py +258 -0
  32. snowflake/snowpark_connect/expression/typer.py +125 -0
  33. snowflake/snowpark_connect/includes/__init__.py +0 -0
  34. snowflake/snowpark_connect/includes/jars/antlr4-runtime-4.9.3.jar +0 -0
  35. snowflake/snowpark_connect/includes/jars/commons-cli-1.5.0.jar +0 -0
  36. snowflake/snowpark_connect/includes/jars/commons-codec-1.16.1.jar +0 -0
  37. snowflake/snowpark_connect/includes/jars/commons-collections-3.2.2.jar +0 -0
  38. snowflake/snowpark_connect/includes/jars/commons-collections4-4.4.jar +0 -0
  39. snowflake/snowpark_connect/includes/jars/commons-compiler-3.1.9.jar +0 -0
  40. snowflake/snowpark_connect/includes/jars/commons-compress-1.26.0.jar +0 -0
  41. snowflake/snowpark_connect/includes/jars/commons-crypto-1.1.0.jar +0 -0
  42. snowflake/snowpark_connect/includes/jars/commons-dbcp-1.4.jar +0 -0
  43. snowflake/snowpark_connect/includes/jars/commons-io-2.16.1.jar +0 -0
  44. snowflake/snowpark_connect/includes/jars/commons-lang-2.6.jar +0 -0
  45. snowflake/snowpark_connect/includes/jars/commons-lang3-3.12.0.jar +0 -0
  46. snowflake/snowpark_connect/includes/jars/commons-logging-1.1.3.jar +0 -0
  47. snowflake/snowpark_connect/includes/jars/commons-math3-3.6.1.jar +0 -0
  48. snowflake/snowpark_connect/includes/jars/commons-pool-1.5.4.jar +0 -0
  49. snowflake/snowpark_connect/includes/jars/commons-text-1.10.0.jar +0 -0
  50. snowflake/snowpark_connect/includes/jars/hadoop-client-api-3.3.4.jar +0 -0
  51. snowflake/snowpark_connect/includes/jars/jackson-annotations-2.15.2.jar +0 -0
  52. snowflake/snowpark_connect/includes/jars/jackson-core-2.15.2.jar +0 -0
  53. snowflake/snowpark_connect/includes/jars/jackson-core-asl-1.9.13.jar +0 -0
  54. snowflake/snowpark_connect/includes/jars/jackson-databind-2.15.2.jar +0 -0
  55. snowflake/snowpark_connect/includes/jars/jackson-dataformat-yaml-2.15.2.jar +0 -0
  56. snowflake/snowpark_connect/includes/jars/jackson-datatype-jsr310-2.15.2.jar +0 -0
  57. snowflake/snowpark_connect/includes/jars/jackson-mapper-asl-1.9.13.jar +0 -0
  58. snowflake/snowpark_connect/includes/jars/jackson-module-scala_2.12-2.15.2.jar +0 -0
  59. snowflake/snowpark_connect/includes/jars/json4s-ast_2.12-3.7.0-M11.jar +0 -0
  60. snowflake/snowpark_connect/includes/jars/json4s-core_2.12-3.7.0-M11.jar +0 -0
  61. snowflake/snowpark_connect/includes/jars/json4s-jackson_2.12-3.7.0-M11.jar +0 -0
  62. snowflake/snowpark_connect/includes/jars/json4s-scalap_2.12-3.7.0-M11.jar +0 -0
  63. snowflake/snowpark_connect/includes/jars/kryo-shaded-4.0.2.jar +0 -0
  64. snowflake/snowpark_connect/includes/jars/log4j-1.2-api-2.20.0.jar +0 -0
  65. snowflake/snowpark_connect/includes/jars/log4j-api-2.20.0.jar +0 -0
  66. snowflake/snowpark_connect/includes/jars/log4j-core-2.20.0.jar +0 -0
  67. snowflake/snowpark_connect/includes/jars/log4j-slf4j2-impl-2.20.0.jar +0 -0
  68. snowflake/snowpark_connect/includes/jars/paranamer-2.8.jar +0 -0
  69. snowflake/snowpark_connect/includes/jars/scala-collection-compat_2.12-2.7.0.jar +0 -0
  70. snowflake/snowpark_connect/includes/jars/scala-compiler-2.12.18.jar +0 -0
  71. snowflake/snowpark_connect/includes/jars/scala-library-2.12.18.jar +0 -0
  72. snowflake/snowpark_connect/includes/jars/scala-parser-combinators_2.12-2.3.0.jar +0 -0
  73. snowflake/snowpark_connect/includes/jars/scala-reflect-2.12.18.jar +0 -0
  74. snowflake/snowpark_connect/includes/jars/scala-xml_2.12-2.1.0.jar +0 -0
  75. snowflake/snowpark_connect/includes/jars/slf4j-api-2.0.7.jar +0 -0
  76. snowflake/snowpark_connect/includes/jars/spark-catalyst_2.12-3.5.6.jar +0 -0
  77. snowflake/snowpark_connect/includes/jars/spark-common-utils_2.12-3.5.6.jar +0 -0
  78. snowflake/snowpark_connect/includes/jars/spark-core_2.12-3.5.6.jar +0 -0
  79. snowflake/snowpark_connect/includes/jars/spark-graphx_2.12-3.5.6.jar +0 -0
  80. snowflake/snowpark_connect/includes/jars/spark-hive-thriftserver_2.12-3.5.6.jar +0 -0
  81. snowflake/snowpark_connect/includes/jars/spark-hive_2.12-3.5.6.jar +0 -0
  82. snowflake/snowpark_connect/includes/jars/spark-kubernetes_2.12-3.5.6.jar +0 -0
  83. snowflake/snowpark_connect/includes/jars/spark-kvstore_2.12-3.5.6.jar +0 -0
  84. snowflake/snowpark_connect/includes/jars/spark-launcher_2.12-3.5.6.jar +0 -0
  85. snowflake/snowpark_connect/includes/jars/spark-mesos_2.12-3.5.6.jar +0 -0
  86. snowflake/snowpark_connect/includes/jars/spark-mllib-local_2.12-3.5.6.jar +0 -0
  87. snowflake/snowpark_connect/includes/jars/spark-mllib_2.12-3.5.6.jar +0 -0
  88. snowflake/snowpark_connect/includes/jars/spark-network-common_2.12-3.5.6.jar +0 -0
  89. snowflake/snowpark_connect/includes/jars/spark-network-shuffle_2.12-3.5.6.jar +0 -0
  90. snowflake/snowpark_connect/includes/jars/spark-repl_2.12-3.5.6.jar +0 -0
  91. snowflake/snowpark_connect/includes/jars/spark-sketch_2.12-3.5.6.jar +0 -0
  92. snowflake/snowpark_connect/includes/jars/spark-sql-api_2.12-3.5.6.jar +0 -0
  93. snowflake/snowpark_connect/includes/jars/spark-sql_2.12-3.5.6.jar +0 -0
  94. snowflake/snowpark_connect/includes/jars/spark-streaming_2.12-3.5.6.jar +0 -0
  95. snowflake/snowpark_connect/includes/jars/spark-tags_2.12-3.5.6.jar +0 -0
  96. snowflake/snowpark_connect/includes/jars/spark-unsafe_2.12-3.5.6.jar +0 -0
  97. snowflake/snowpark_connect/includes/jars/spark-yarn_2.12-3.5.6.jar +0 -0
  98. snowflake/snowpark_connect/includes/python/__init__.py +21 -0
  99. snowflake/snowpark_connect/includes/python/pyspark/__init__.py +173 -0
  100. snowflake/snowpark_connect/includes/python/pyspark/_globals.py +71 -0
  101. snowflake/snowpark_connect/includes/python/pyspark/_typing.pyi +43 -0
  102. snowflake/snowpark_connect/includes/python/pyspark/accumulators.py +341 -0
  103. snowflake/snowpark_connect/includes/python/pyspark/broadcast.py +383 -0
  104. snowflake/snowpark_connect/includes/python/pyspark/cloudpickle/__init__.py +8 -0
  105. snowflake/snowpark_connect/includes/python/pyspark/cloudpickle/cloudpickle.py +948 -0
  106. snowflake/snowpark_connect/includes/python/pyspark/cloudpickle/cloudpickle_fast.py +844 -0
  107. snowflake/snowpark_connect/includes/python/pyspark/cloudpickle/compat.py +18 -0
  108. snowflake/snowpark_connect/includes/python/pyspark/conf.py +276 -0
  109. snowflake/snowpark_connect/includes/python/pyspark/context.py +2601 -0
  110. snowflake/snowpark_connect/includes/python/pyspark/daemon.py +218 -0
  111. snowflake/snowpark_connect/includes/python/pyspark/errors/__init__.py +70 -0
  112. snowflake/snowpark_connect/includes/python/pyspark/errors/error_classes.py +889 -0
  113. snowflake/snowpark_connect/includes/python/pyspark/errors/exceptions/__init__.py +16 -0
  114. snowflake/snowpark_connect/includes/python/pyspark/errors/exceptions/base.py +228 -0
  115. snowflake/snowpark_connect/includes/python/pyspark/errors/exceptions/captured.py +307 -0
  116. snowflake/snowpark_connect/includes/python/pyspark/errors/exceptions/connect.py +190 -0
  117. snowflake/snowpark_connect/includes/python/pyspark/errors/tests/__init__.py +16 -0
  118. snowflake/snowpark_connect/includes/python/pyspark/errors/tests/test_errors.py +60 -0
  119. snowflake/snowpark_connect/includes/python/pyspark/errors/utils.py +116 -0
  120. snowflake/snowpark_connect/includes/python/pyspark/files.py +165 -0
  121. snowflake/snowpark_connect/includes/python/pyspark/find_spark_home.py +95 -0
  122. snowflake/snowpark_connect/includes/python/pyspark/install.py +203 -0
  123. snowflake/snowpark_connect/includes/python/pyspark/instrumentation_utils.py +190 -0
  124. snowflake/snowpark_connect/includes/python/pyspark/java_gateway.py +248 -0
  125. snowflake/snowpark_connect/includes/python/pyspark/join.py +118 -0
  126. snowflake/snowpark_connect/includes/python/pyspark/ml/__init__.py +71 -0
  127. snowflake/snowpark_connect/includes/python/pyspark/ml/_typing.pyi +84 -0
  128. snowflake/snowpark_connect/includes/python/pyspark/ml/base.py +414 -0
  129. snowflake/snowpark_connect/includes/python/pyspark/ml/classification.py +4332 -0
  130. snowflake/snowpark_connect/includes/python/pyspark/ml/clustering.py +2188 -0
  131. snowflake/snowpark_connect/includes/python/pyspark/ml/common.py +146 -0
  132. snowflake/snowpark_connect/includes/python/pyspark/ml/connect/__init__.py +44 -0
  133. snowflake/snowpark_connect/includes/python/pyspark/ml/connect/base.py +346 -0
  134. snowflake/snowpark_connect/includes/python/pyspark/ml/connect/classification.py +382 -0
  135. snowflake/snowpark_connect/includes/python/pyspark/ml/connect/evaluation.py +291 -0
  136. snowflake/snowpark_connect/includes/python/pyspark/ml/connect/feature.py +258 -0
  137. snowflake/snowpark_connect/includes/python/pyspark/ml/connect/functions.py +77 -0
  138. snowflake/snowpark_connect/includes/python/pyspark/ml/connect/io_utils.py +335 -0
  139. snowflake/snowpark_connect/includes/python/pyspark/ml/connect/pipeline.py +262 -0
  140. snowflake/snowpark_connect/includes/python/pyspark/ml/connect/summarizer.py +120 -0
  141. snowflake/snowpark_connect/includes/python/pyspark/ml/connect/tuning.py +579 -0
  142. snowflake/snowpark_connect/includes/python/pyspark/ml/connect/util.py +173 -0
  143. snowflake/snowpark_connect/includes/python/pyspark/ml/deepspeed/__init__.py +16 -0
  144. snowflake/snowpark_connect/includes/python/pyspark/ml/deepspeed/deepspeed_distributor.py +165 -0
  145. snowflake/snowpark_connect/includes/python/pyspark/ml/deepspeed/tests/test_deepspeed_distributor.py +306 -0
  146. snowflake/snowpark_connect/includes/python/pyspark/ml/dl_util.py +150 -0
  147. snowflake/snowpark_connect/includes/python/pyspark/ml/evaluation.py +1166 -0
  148. snowflake/snowpark_connect/includes/python/pyspark/ml/feature.py +7474 -0
  149. snowflake/snowpark_connect/includes/python/pyspark/ml/fpm.py +543 -0
  150. snowflake/snowpark_connect/includes/python/pyspark/ml/functions.py +842 -0
  151. snowflake/snowpark_connect/includes/python/pyspark/ml/image.py +271 -0
  152. snowflake/snowpark_connect/includes/python/pyspark/ml/linalg/__init__.py +1382 -0
  153. snowflake/snowpark_connect/includes/python/pyspark/ml/model_cache.py +55 -0
  154. snowflake/snowpark_connect/includes/python/pyspark/ml/param/__init__.py +602 -0
  155. snowflake/snowpark_connect/includes/python/pyspark/ml/param/_shared_params_code_gen.py +368 -0
  156. snowflake/snowpark_connect/includes/python/pyspark/ml/param/shared.py +878 -0
  157. snowflake/snowpark_connect/includes/python/pyspark/ml/pipeline.py +451 -0
  158. snowflake/snowpark_connect/includes/python/pyspark/ml/recommendation.py +748 -0
  159. snowflake/snowpark_connect/includes/python/pyspark/ml/regression.py +3335 -0
  160. snowflake/snowpark_connect/includes/python/pyspark/ml/stat.py +523 -0
  161. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/__init__.py +16 -0
  162. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_connect_classification.py +53 -0
  163. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_connect_evaluation.py +50 -0
  164. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_connect_feature.py +43 -0
  165. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_connect_function.py +114 -0
  166. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_connect_pipeline.py +47 -0
  167. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_connect_summarizer.py +43 -0
  168. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_connect_tuning.py +46 -0
  169. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_legacy_mode_classification.py +238 -0
  170. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_legacy_mode_evaluation.py +194 -0
  171. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_legacy_mode_feature.py +156 -0
  172. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_legacy_mode_pipeline.py +184 -0
  173. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_legacy_mode_summarizer.py +78 -0
  174. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_legacy_mode_tuning.py +292 -0
  175. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_parity_torch_data_loader.py +50 -0
  176. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_parity_torch_distributor.py +152 -0
  177. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_algorithms.py +456 -0
  178. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_base.py +96 -0
  179. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_dl_util.py +186 -0
  180. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_evaluation.py +77 -0
  181. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_feature.py +401 -0
  182. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_functions.py +528 -0
  183. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_image.py +82 -0
  184. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_linalg.py +409 -0
  185. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_model_cache.py +55 -0
  186. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_param.py +441 -0
  187. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_persistence.py +546 -0
  188. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_pipeline.py +71 -0
  189. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_stat.py +52 -0
  190. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_training_summary.py +494 -0
  191. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_util.py +85 -0
  192. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_wrapper.py +138 -0
  193. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/tuning/__init__.py +16 -0
  194. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/tuning/test_cv_io_basic.py +151 -0
  195. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/tuning/test_cv_io_nested.py +97 -0
  196. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/tuning/test_cv_io_pipeline.py +143 -0
  197. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/tuning/test_tuning.py +551 -0
  198. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/tuning/test_tvs_io_basic.py +137 -0
  199. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/tuning/test_tvs_io_nested.py +96 -0
  200. snowflake/snowpark_connect/includes/python/pyspark/ml/tests/tuning/test_tvs_io_pipeline.py +142 -0
  201. snowflake/snowpark_connect/includes/python/pyspark/ml/torch/__init__.py +16 -0
  202. snowflake/snowpark_connect/includes/python/pyspark/ml/torch/data.py +100 -0
  203. snowflake/snowpark_connect/includes/python/pyspark/ml/torch/distributor.py +1133 -0
  204. snowflake/snowpark_connect/includes/python/pyspark/ml/torch/log_communication.py +198 -0
  205. snowflake/snowpark_connect/includes/python/pyspark/ml/torch/tests/__init__.py +16 -0
  206. snowflake/snowpark_connect/includes/python/pyspark/ml/torch/tests/test_data_loader.py +137 -0
  207. snowflake/snowpark_connect/includes/python/pyspark/ml/torch/tests/test_distributor.py +561 -0
  208. snowflake/snowpark_connect/includes/python/pyspark/ml/torch/tests/test_log_communication.py +172 -0
  209. snowflake/snowpark_connect/includes/python/pyspark/ml/torch/torch_run_process_wrapper.py +83 -0
  210. snowflake/snowpark_connect/includes/python/pyspark/ml/tree.py +434 -0
  211. snowflake/snowpark_connect/includes/python/pyspark/ml/tuning.py +1741 -0
  212. snowflake/snowpark_connect/includes/python/pyspark/ml/util.py +749 -0
  213. snowflake/snowpark_connect/includes/python/pyspark/ml/wrapper.py +465 -0
  214. snowflake/snowpark_connect/includes/python/pyspark/mllib/__init__.py +44 -0
  215. snowflake/snowpark_connect/includes/python/pyspark/mllib/_typing.pyi +33 -0
  216. snowflake/snowpark_connect/includes/python/pyspark/mllib/classification.py +989 -0
  217. snowflake/snowpark_connect/includes/python/pyspark/mllib/clustering.py +1318 -0
  218. snowflake/snowpark_connect/includes/python/pyspark/mllib/common.py +174 -0
  219. snowflake/snowpark_connect/includes/python/pyspark/mllib/evaluation.py +691 -0
  220. snowflake/snowpark_connect/includes/python/pyspark/mllib/feature.py +1085 -0
  221. snowflake/snowpark_connect/includes/python/pyspark/mllib/fpm.py +233 -0
  222. snowflake/snowpark_connect/includes/python/pyspark/mllib/linalg/__init__.py +1653 -0
  223. snowflake/snowpark_connect/includes/python/pyspark/mllib/linalg/distributed.py +1662 -0
  224. snowflake/snowpark_connect/includes/python/pyspark/mllib/random.py +698 -0
  225. snowflake/snowpark_connect/includes/python/pyspark/mllib/recommendation.py +389 -0
  226. snowflake/snowpark_connect/includes/python/pyspark/mllib/regression.py +1067 -0
  227. snowflake/snowpark_connect/includes/python/pyspark/mllib/stat/KernelDensity.py +59 -0
  228. snowflake/snowpark_connect/includes/python/pyspark/mllib/stat/__init__.py +34 -0
  229. snowflake/snowpark_connect/includes/python/pyspark/mllib/stat/_statistics.py +409 -0
  230. snowflake/snowpark_connect/includes/python/pyspark/mllib/stat/distribution.py +39 -0
  231. snowflake/snowpark_connect/includes/python/pyspark/mllib/stat/test.py +86 -0
  232. snowflake/snowpark_connect/includes/python/pyspark/mllib/tests/__init__.py +16 -0
  233. snowflake/snowpark_connect/includes/python/pyspark/mllib/tests/test_algorithms.py +353 -0
  234. snowflake/snowpark_connect/includes/python/pyspark/mllib/tests/test_feature.py +192 -0
  235. snowflake/snowpark_connect/includes/python/pyspark/mllib/tests/test_linalg.py +680 -0
  236. snowflake/snowpark_connect/includes/python/pyspark/mllib/tests/test_stat.py +206 -0
  237. snowflake/snowpark_connect/includes/python/pyspark/mllib/tests/test_streaming_algorithms.py +471 -0
  238. snowflake/snowpark_connect/includes/python/pyspark/mllib/tests/test_util.py +108 -0
  239. snowflake/snowpark_connect/includes/python/pyspark/mllib/tree.py +888 -0
  240. snowflake/snowpark_connect/includes/python/pyspark/mllib/util.py +659 -0
  241. snowflake/snowpark_connect/includes/python/pyspark/pandas/__init__.py +165 -0
  242. snowflake/snowpark_connect/includes/python/pyspark/pandas/_typing.py +52 -0
  243. snowflake/snowpark_connect/includes/python/pyspark/pandas/accessors.py +989 -0
  244. snowflake/snowpark_connect/includes/python/pyspark/pandas/base.py +1804 -0
  245. snowflake/snowpark_connect/includes/python/pyspark/pandas/categorical.py +822 -0
  246. snowflake/snowpark_connect/includes/python/pyspark/pandas/config.py +539 -0
  247. snowflake/snowpark_connect/includes/python/pyspark/pandas/correlation.py +262 -0
  248. snowflake/snowpark_connect/includes/python/pyspark/pandas/data_type_ops/__init__.py +16 -0
  249. snowflake/snowpark_connect/includes/python/pyspark/pandas/data_type_ops/base.py +519 -0
  250. snowflake/snowpark_connect/includes/python/pyspark/pandas/data_type_ops/binary_ops.py +98 -0
  251. snowflake/snowpark_connect/includes/python/pyspark/pandas/data_type_ops/boolean_ops.py +426 -0
  252. snowflake/snowpark_connect/includes/python/pyspark/pandas/data_type_ops/categorical_ops.py +141 -0
  253. snowflake/snowpark_connect/includes/python/pyspark/pandas/data_type_ops/complex_ops.py +145 -0
  254. snowflake/snowpark_connect/includes/python/pyspark/pandas/data_type_ops/date_ops.py +127 -0
  255. snowflake/snowpark_connect/includes/python/pyspark/pandas/data_type_ops/datetime_ops.py +171 -0
  256. snowflake/snowpark_connect/includes/python/pyspark/pandas/data_type_ops/null_ops.py +83 -0
  257. snowflake/snowpark_connect/includes/python/pyspark/pandas/data_type_ops/num_ops.py +588 -0
  258. snowflake/snowpark_connect/includes/python/pyspark/pandas/data_type_ops/string_ops.py +154 -0
  259. snowflake/snowpark_connect/includes/python/pyspark/pandas/data_type_ops/timedelta_ops.py +101 -0
  260. snowflake/snowpark_connect/includes/python/pyspark/pandas/data_type_ops/udt_ops.py +29 -0
  261. snowflake/snowpark_connect/includes/python/pyspark/pandas/datetimes.py +891 -0
  262. snowflake/snowpark_connect/includes/python/pyspark/pandas/exceptions.py +150 -0
  263. snowflake/snowpark_connect/includes/python/pyspark/pandas/extensions.py +388 -0
  264. snowflake/snowpark_connect/includes/python/pyspark/pandas/frame.py +13738 -0
  265. snowflake/snowpark_connect/includes/python/pyspark/pandas/generic.py +3560 -0
  266. snowflake/snowpark_connect/includes/python/pyspark/pandas/groupby.py +4448 -0
  267. snowflake/snowpark_connect/includes/python/pyspark/pandas/indexes/__init__.py +21 -0
  268. snowflake/snowpark_connect/includes/python/pyspark/pandas/indexes/base.py +2783 -0
  269. snowflake/snowpark_connect/includes/python/pyspark/pandas/indexes/category.py +773 -0
  270. snowflake/snowpark_connect/includes/python/pyspark/pandas/indexes/datetimes.py +843 -0
  271. snowflake/snowpark_connect/includes/python/pyspark/pandas/indexes/multi.py +1323 -0
  272. snowflake/snowpark_connect/includes/python/pyspark/pandas/indexes/numeric.py +210 -0
  273. snowflake/snowpark_connect/includes/python/pyspark/pandas/indexes/timedelta.py +197 -0
  274. snowflake/snowpark_connect/includes/python/pyspark/pandas/indexing.py +1862 -0
  275. snowflake/snowpark_connect/includes/python/pyspark/pandas/internal.py +1680 -0
  276. snowflake/snowpark_connect/includes/python/pyspark/pandas/missing/__init__.py +48 -0
  277. snowflake/snowpark_connect/includes/python/pyspark/pandas/missing/common.py +76 -0
  278. snowflake/snowpark_connect/includes/python/pyspark/pandas/missing/frame.py +63 -0
  279. snowflake/snowpark_connect/includes/python/pyspark/pandas/missing/general_functions.py +43 -0
  280. snowflake/snowpark_connect/includes/python/pyspark/pandas/missing/groupby.py +93 -0
  281. snowflake/snowpark_connect/includes/python/pyspark/pandas/missing/indexes.py +184 -0
  282. snowflake/snowpark_connect/includes/python/pyspark/pandas/missing/resample.py +101 -0
  283. snowflake/snowpark_connect/includes/python/pyspark/pandas/missing/scalars.py +29 -0
  284. snowflake/snowpark_connect/includes/python/pyspark/pandas/missing/series.py +69 -0
  285. snowflake/snowpark_connect/includes/python/pyspark/pandas/missing/window.py +168 -0
  286. snowflake/snowpark_connect/includes/python/pyspark/pandas/mlflow.py +238 -0
  287. snowflake/snowpark_connect/includes/python/pyspark/pandas/namespace.py +3807 -0
  288. snowflake/snowpark_connect/includes/python/pyspark/pandas/numpy_compat.py +260 -0
  289. snowflake/snowpark_connect/includes/python/pyspark/pandas/plot/__init__.py +17 -0
  290. snowflake/snowpark_connect/includes/python/pyspark/pandas/plot/core.py +1213 -0
  291. snowflake/snowpark_connect/includes/python/pyspark/pandas/plot/matplotlib.py +928 -0
  292. snowflake/snowpark_connect/includes/python/pyspark/pandas/plot/plotly.py +261 -0
  293. snowflake/snowpark_connect/includes/python/pyspark/pandas/resample.py +816 -0
  294. snowflake/snowpark_connect/includes/python/pyspark/pandas/series.py +7440 -0
  295. snowflake/snowpark_connect/includes/python/pyspark/pandas/sql_formatter.py +308 -0
  296. snowflake/snowpark_connect/includes/python/pyspark/pandas/sql_processor.py +394 -0
  297. snowflake/snowpark_connect/includes/python/pyspark/pandas/strings.py +2371 -0
  298. snowflake/snowpark_connect/includes/python/pyspark/pandas/supported_api_gen.py +378 -0
  299. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/__init__.py +16 -0
  300. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/__init__.py +16 -0
  301. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_any_all.py +177 -0
  302. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_apply_func.py +575 -0
  303. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_binary_ops.py +235 -0
  304. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_combine.py +653 -0
  305. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_compute.py +463 -0
  306. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_corrwith.py +86 -0
  307. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_cov.py +151 -0
  308. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_cumulative.py +139 -0
  309. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_describe.py +458 -0
  310. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_eval.py +86 -0
  311. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_melt.py +202 -0
  312. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_missing_data.py +520 -0
  313. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_pivot.py +361 -0
  314. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/__init__.py +16 -0
  315. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/__init__.py +16 -0
  316. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_any_all.py +40 -0
  317. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_apply_func.py +42 -0
  318. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_binary_ops.py +40 -0
  319. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_combine.py +37 -0
  320. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_compute.py +60 -0
  321. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_corrwith.py +40 -0
  322. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_cov.py +40 -0
  323. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_cumulative.py +90 -0
  324. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_describe.py +40 -0
  325. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_eval.py +40 -0
  326. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_melt.py +40 -0
  327. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_missing_data.py +42 -0
  328. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_pivot.py +37 -0
  329. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/__init__.py +16 -0
  330. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_base.py +36 -0
  331. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_binary_ops.py +42 -0
  332. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_boolean_ops.py +47 -0
  333. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_categorical_ops.py +55 -0
  334. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_complex_ops.py +40 -0
  335. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_date_ops.py +47 -0
  336. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_datetime_ops.py +47 -0
  337. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_null_ops.py +42 -0
  338. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_num_arithmetic.py +43 -0
  339. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_num_ops.py +47 -0
  340. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_num_reverse.py +43 -0
  341. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_string_ops.py +47 -0
  342. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_timedelta_ops.py +47 -0
  343. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_udt_ops.py +40 -0
  344. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/testing_utils.py +226 -0
  345. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/__init__.py +16 -0
  346. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_align.py +39 -0
  347. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_basic_slow.py +55 -0
  348. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_cov_corrwith.py +39 -0
  349. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_dot_frame.py +39 -0
  350. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_dot_series.py +39 -0
  351. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_index.py +39 -0
  352. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_series.py +39 -0
  353. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_setitem_frame.py +43 -0
  354. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_setitem_series.py +43 -0
  355. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/__init__.py +16 -0
  356. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_attrs.py +40 -0
  357. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_constructor.py +39 -0
  358. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_conversion.py +42 -0
  359. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_reindexing.py +42 -0
  360. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_reshaping.py +37 -0
  361. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_spark.py +40 -0
  362. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_take.py +42 -0
  363. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_time_series.py +48 -0
  364. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_truncate.py +40 -0
  365. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/__init__.py +16 -0
  366. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_aggregate.py +40 -0
  367. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_apply_func.py +41 -0
  368. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_cumulative.py +67 -0
  369. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_describe.py +40 -0
  370. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_groupby.py +55 -0
  371. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_head_tail.py +40 -0
  372. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_index.py +38 -0
  373. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_missing_data.py +55 -0
  374. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_split_apply.py +39 -0
  375. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_stat.py +38 -0
  376. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/__init__.py +16 -0
  377. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_align.py +40 -0
  378. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_base.py +50 -0
  379. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_category.py +73 -0
  380. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_datetime.py +39 -0
  381. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_indexing.py +40 -0
  382. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_reindex.py +40 -0
  383. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_rename.py +40 -0
  384. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_reset_index.py +48 -0
  385. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_timedelta.py +39 -0
  386. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/io/__init__.py +16 -0
  387. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/io/test_parity_io.py +40 -0
  388. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/plot/__init__.py +16 -0
  389. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/plot/test_parity_frame_plot.py +45 -0
  390. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/plot/test_parity_frame_plot_matplotlib.py +45 -0
  391. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/plot/test_parity_frame_plot_plotly.py +49 -0
  392. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/plot/test_parity_series_plot.py +37 -0
  393. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/plot/test_parity_series_plot_matplotlib.py +53 -0
  394. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/plot/test_parity_series_plot_plotly.py +45 -0
  395. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/__init__.py +16 -0
  396. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_all_any.py +38 -0
  397. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_arg_ops.py +37 -0
  398. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_as_of.py +37 -0
  399. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_as_type.py +38 -0
  400. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_compute.py +37 -0
  401. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_conversion.py +40 -0
  402. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_cumulative.py +40 -0
  403. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_index.py +38 -0
  404. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_missing_data.py +40 -0
  405. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_series.py +37 -0
  406. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_sort.py +38 -0
  407. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_stat.py +38 -0
  408. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_categorical.py +66 -0
  409. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_config.py +37 -0
  410. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_csv.py +37 -0
  411. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_dataframe_conversion.py +42 -0
  412. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_dataframe_spark_io.py +39 -0
  413. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_default_index.py +49 -0
  414. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_ewm.py +37 -0
  415. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_expanding.py +39 -0
  416. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_extension.py +49 -0
  417. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_frame_spark.py +53 -0
  418. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_generic_functions.py +43 -0
  419. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_indexing.py +49 -0
  420. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_indexops_spark.py +39 -0
  421. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_internal.py +41 -0
  422. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_namespace.py +39 -0
  423. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_numpy_compat.py +60 -0
  424. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_ops_on_diff_frames.py +48 -0
  425. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_ops_on_diff_frames_groupby.py +39 -0
  426. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_ops_on_diff_frames_groupby_expanding.py +44 -0
  427. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_ops_on_diff_frames_groupby_rolling.py +84 -0
  428. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_repr.py +37 -0
  429. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_resample.py +45 -0
  430. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_reshape.py +39 -0
  431. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_rolling.py +39 -0
  432. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_scalars.py +37 -0
  433. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_series_conversion.py +39 -0
  434. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_series_datetime.py +39 -0
  435. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_series_string.py +39 -0
  436. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_spark_functions.py +39 -0
  437. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_sql.py +43 -0
  438. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_stats.py +37 -0
  439. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_typedef.py +36 -0
  440. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_utils.py +37 -0
  441. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_window.py +39 -0
  442. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/__init__.py +16 -0
  443. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_base.py +107 -0
  444. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_binary_ops.py +224 -0
  445. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_boolean_ops.py +825 -0
  446. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_categorical_ops.py +562 -0
  447. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_complex_ops.py +368 -0
  448. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_date_ops.py +257 -0
  449. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_datetime_ops.py +260 -0
  450. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_null_ops.py +178 -0
  451. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_num_arithmetic.py +184 -0
  452. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_num_ops.py +497 -0
  453. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_num_reverse.py +140 -0
  454. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_string_ops.py +354 -0
  455. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_timedelta_ops.py +219 -0
  456. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_udt_ops.py +192 -0
  457. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/testing_utils.py +228 -0
  458. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/__init__.py +16 -0
  459. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_align.py +118 -0
  460. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_basic_slow.py +198 -0
  461. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_cov_corrwith.py +181 -0
  462. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_dot_frame.py +103 -0
  463. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_dot_series.py +141 -0
  464. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_index.py +109 -0
  465. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_series.py +136 -0
  466. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_setitem_frame.py +125 -0
  467. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_setitem_series.py +217 -0
  468. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/__init__.py +16 -0
  469. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_attrs.py +384 -0
  470. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_constructor.py +598 -0
  471. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_conversion.py +73 -0
  472. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_reindexing.py +869 -0
  473. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_reshaping.py +487 -0
  474. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_spark.py +309 -0
  475. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_take.py +156 -0
  476. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_time_series.py +149 -0
  477. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_truncate.py +163 -0
  478. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/__init__.py +16 -0
  479. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_aggregate.py +311 -0
  480. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_apply_func.py +524 -0
  481. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_cumulative.py +419 -0
  482. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_describe.py +144 -0
  483. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_groupby.py +979 -0
  484. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_head_tail.py +234 -0
  485. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_index.py +206 -0
  486. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_missing_data.py +421 -0
  487. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_split_apply.py +187 -0
  488. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_stat.py +397 -0
  489. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/__init__.py +16 -0
  490. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_align.py +100 -0
  491. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_base.py +2743 -0
  492. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_category.py +484 -0
  493. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_datetime.py +276 -0
  494. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_indexing.py +432 -0
  495. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_reindex.py +310 -0
  496. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_rename.py +257 -0
  497. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_reset_index.py +160 -0
  498. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_timedelta.py +128 -0
  499. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/io/__init__.py +16 -0
  500. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/io/test_io.py +137 -0
  501. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/plot/__init__.py +16 -0
  502. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/plot/test_frame_plot.py +170 -0
  503. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/plot/test_frame_plot_matplotlib.py +547 -0
  504. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/plot/test_frame_plot_plotly.py +285 -0
  505. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/plot/test_series_plot.py +106 -0
  506. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/plot/test_series_plot_matplotlib.py +409 -0
  507. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/plot/test_series_plot_plotly.py +247 -0
  508. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/__init__.py +16 -0
  509. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_all_any.py +105 -0
  510. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_arg_ops.py +197 -0
  511. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_as_of.py +137 -0
  512. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_as_type.py +227 -0
  513. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_compute.py +634 -0
  514. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_conversion.py +88 -0
  515. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_cumulative.py +139 -0
  516. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_index.py +475 -0
  517. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_missing_data.py +265 -0
  518. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_series.py +818 -0
  519. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_sort.py +162 -0
  520. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_stat.py +780 -0
  521. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_categorical.py +741 -0
  522. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_config.py +160 -0
  523. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_csv.py +453 -0
  524. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_dataframe_conversion.py +281 -0
  525. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_dataframe_spark_io.py +487 -0
  526. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_default_index.py +109 -0
  527. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_ewm.py +434 -0
  528. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_expanding.py +253 -0
  529. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_extension.py +152 -0
  530. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_frame_spark.py +162 -0
  531. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_generic_functions.py +234 -0
  532. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_indexing.py +1339 -0
  533. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_indexops_spark.py +82 -0
  534. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_internal.py +124 -0
  535. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_namespace.py +638 -0
  536. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_numpy_compat.py +200 -0
  537. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_ops_on_diff_frames.py +1355 -0
  538. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_ops_on_diff_frames_groupby.py +655 -0
  539. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_ops_on_diff_frames_groupby_expanding.py +113 -0
  540. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_ops_on_diff_frames_groupby_rolling.py +118 -0
  541. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_repr.py +192 -0
  542. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_resample.py +346 -0
  543. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_reshape.py +495 -0
  544. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_rolling.py +263 -0
  545. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_scalars.py +59 -0
  546. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_series_conversion.py +85 -0
  547. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_series_datetime.py +364 -0
  548. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_series_string.py +362 -0
  549. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_spark_functions.py +46 -0
  550. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_sql.py +123 -0
  551. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_stats.py +581 -0
  552. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_typedef.py +447 -0
  553. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_utils.py +301 -0
  554. snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_window.py +465 -0
  555. snowflake/snowpark_connect/includes/python/pyspark/pandas/typedef/__init__.py +18 -0
  556. snowflake/snowpark_connect/includes/python/pyspark/pandas/typedef/typehints.py +874 -0
  557. snowflake/snowpark_connect/includes/python/pyspark/pandas/usage_logging/__init__.py +143 -0
  558. snowflake/snowpark_connect/includes/python/pyspark/pandas/usage_logging/usage_logger.py +132 -0
  559. snowflake/snowpark_connect/includes/python/pyspark/pandas/utils.py +1063 -0
  560. snowflake/snowpark_connect/includes/python/pyspark/pandas/window.py +2702 -0
  561. snowflake/snowpark_connect/includes/python/pyspark/profiler.py +489 -0
  562. snowflake/snowpark_connect/includes/python/pyspark/py.typed +1 -0
  563. snowflake/snowpark_connect/includes/python/pyspark/python/pyspark/shell.py +123 -0
  564. snowflake/snowpark_connect/includes/python/pyspark/rdd.py +5518 -0
  565. snowflake/snowpark_connect/includes/python/pyspark/rddsampler.py +115 -0
  566. snowflake/snowpark_connect/includes/python/pyspark/resource/__init__.py +38 -0
  567. snowflake/snowpark_connect/includes/python/pyspark/resource/information.py +69 -0
  568. snowflake/snowpark_connect/includes/python/pyspark/resource/profile.py +317 -0
  569. snowflake/snowpark_connect/includes/python/pyspark/resource/requests.py +539 -0
  570. snowflake/snowpark_connect/includes/python/pyspark/resource/tests/__init__.py +16 -0
  571. snowflake/snowpark_connect/includes/python/pyspark/resource/tests/test_resources.py +83 -0
  572. snowflake/snowpark_connect/includes/python/pyspark/resultiterable.py +45 -0
  573. snowflake/snowpark_connect/includes/python/pyspark/serializers.py +681 -0
  574. snowflake/snowpark_connect/includes/python/pyspark/shell.py +123 -0
  575. snowflake/snowpark_connect/includes/python/pyspark/shuffle.py +854 -0
  576. snowflake/snowpark_connect/includes/python/pyspark/sql/__init__.py +75 -0
  577. snowflake/snowpark_connect/includes/python/pyspark/sql/_typing.pyi +80 -0
  578. snowflake/snowpark_connect/includes/python/pyspark/sql/avro/__init__.py +18 -0
  579. snowflake/snowpark_connect/includes/python/pyspark/sql/avro/functions.py +188 -0
  580. snowflake/snowpark_connect/includes/python/pyspark/sql/catalog.py +1270 -0
  581. snowflake/snowpark_connect/includes/python/pyspark/sql/column.py +1431 -0
  582. snowflake/snowpark_connect/includes/python/pyspark/sql/conf.py +99 -0
  583. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/__init__.py +18 -0
  584. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/_typing.py +90 -0
  585. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/avro/__init__.py +18 -0
  586. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/avro/functions.py +107 -0
  587. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/catalog.py +356 -0
  588. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/client/__init__.py +22 -0
  589. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/client/artifact.py +412 -0
  590. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/client/core.py +1689 -0
  591. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/client/reattach.py +340 -0
  592. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/column.py +514 -0
  593. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/conf.py +128 -0
  594. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/conversion.py +490 -0
  595. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/dataframe.py +2172 -0
  596. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/expressions.py +1056 -0
  597. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/functions.py +3937 -0
  598. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/group.py +418 -0
  599. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/plan.py +2289 -0
  600. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/proto/__init__.py +25 -0
  601. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/proto/base_pb2.py +203 -0
  602. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/proto/base_pb2.pyi +2718 -0
  603. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/proto/base_pb2_grpc.py +423 -0
  604. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/proto/catalog_pb2.py +109 -0
  605. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/proto/catalog_pb2.pyi +1130 -0
  606. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/proto/commands_pb2.py +141 -0
  607. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/proto/commands_pb2.pyi +1766 -0
  608. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/proto/common_pb2.py +47 -0
  609. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/proto/common_pb2.pyi +123 -0
  610. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/proto/example_plugins_pb2.py +53 -0
  611. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/proto/example_plugins_pb2.pyi +112 -0
  612. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/proto/expressions_pb2.py +107 -0
  613. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/proto/expressions_pb2.pyi +1507 -0
  614. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/proto/relations_pb2.py +195 -0
  615. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/proto/relations_pb2.pyi +3613 -0
  616. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/proto/types_pb2.py +95 -0
  617. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/proto/types_pb2.pyi +980 -0
  618. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/protobuf/__init__.py +18 -0
  619. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/protobuf/functions.py +166 -0
  620. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/readwriter.py +861 -0
  621. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/session.py +952 -0
  622. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/streaming/__init__.py +22 -0
  623. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/streaming/query.py +295 -0
  624. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/streaming/readwriter.py +618 -0
  625. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/streaming/worker/__init__.py +18 -0
  626. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/streaming/worker/foreach_batch_worker.py +87 -0
  627. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/streaming/worker/listener_worker.py +100 -0
  628. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/types.py +301 -0
  629. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/udf.py +296 -0
  630. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/udtf.py +200 -0
  631. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/utils.py +58 -0
  632. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/window.py +266 -0
  633. snowflake/snowpark_connect/includes/python/pyspark/sql/context.py +818 -0
  634. snowflake/snowpark_connect/includes/python/pyspark/sql/dataframe.py +5973 -0
  635. snowflake/snowpark_connect/includes/python/pyspark/sql/functions.py +15889 -0
  636. snowflake/snowpark_connect/includes/python/pyspark/sql/group.py +547 -0
  637. snowflake/snowpark_connect/includes/python/pyspark/sql/observation.py +152 -0
  638. snowflake/snowpark_connect/includes/python/pyspark/sql/pandas/__init__.py +21 -0
  639. snowflake/snowpark_connect/includes/python/pyspark/sql/pandas/_typing/__init__.pyi +344 -0
  640. snowflake/snowpark_connect/includes/python/pyspark/sql/pandas/_typing/protocols/__init__.pyi +17 -0
  641. snowflake/snowpark_connect/includes/python/pyspark/sql/pandas/_typing/protocols/frame.pyi +20 -0
  642. snowflake/snowpark_connect/includes/python/pyspark/sql/pandas/_typing/protocols/series.pyi +20 -0
  643. snowflake/snowpark_connect/includes/python/pyspark/sql/pandas/conversion.py +671 -0
  644. snowflake/snowpark_connect/includes/python/pyspark/sql/pandas/functions.py +480 -0
  645. snowflake/snowpark_connect/includes/python/pyspark/sql/pandas/functions.pyi +132 -0
  646. snowflake/snowpark_connect/includes/python/pyspark/sql/pandas/group_ops.py +523 -0
  647. snowflake/snowpark_connect/includes/python/pyspark/sql/pandas/map_ops.py +216 -0
  648. snowflake/snowpark_connect/includes/python/pyspark/sql/pandas/serializers.py +1019 -0
  649. snowflake/snowpark_connect/includes/python/pyspark/sql/pandas/typehints.py +172 -0
  650. snowflake/snowpark_connect/includes/python/pyspark/sql/pandas/types.py +972 -0
  651. snowflake/snowpark_connect/includes/python/pyspark/sql/pandas/utils.py +86 -0
  652. snowflake/snowpark_connect/includes/python/pyspark/sql/protobuf/__init__.py +18 -0
  653. snowflake/snowpark_connect/includes/python/pyspark/sql/protobuf/functions.py +334 -0
  654. snowflake/snowpark_connect/includes/python/pyspark/sql/readwriter.py +2159 -0
  655. snowflake/snowpark_connect/includes/python/pyspark/sql/session.py +2088 -0
  656. snowflake/snowpark_connect/includes/python/pyspark/sql/sql_formatter.py +84 -0
  657. snowflake/snowpark_connect/includes/python/pyspark/sql/streaming/__init__.py +21 -0
  658. snowflake/snowpark_connect/includes/python/pyspark/sql/streaming/listener.py +1050 -0
  659. snowflake/snowpark_connect/includes/python/pyspark/sql/streaming/query.py +746 -0
  660. snowflake/snowpark_connect/includes/python/pyspark/sql/streaming/readwriter.py +1652 -0
  661. snowflake/snowpark_connect/includes/python/pyspark/sql/streaming/state.py +288 -0
  662. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/__init__.py +16 -0
  663. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/__init__.py +16 -0
  664. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/client/__init__.py +16 -0
  665. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/client/test_artifact.py +420 -0
  666. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/client/test_client.py +358 -0
  667. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/streaming/__init__.py +16 -0
  668. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/streaming/test_parity_foreach.py +36 -0
  669. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/streaming/test_parity_foreach_batch.py +44 -0
  670. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/streaming/test_parity_listener.py +116 -0
  671. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/streaming/test_parity_streaming.py +35 -0
  672. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_connect_basic.py +3612 -0
  673. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_connect_column.py +1042 -0
  674. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_connect_function.py +2381 -0
  675. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_connect_plan.py +1060 -0
  676. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_arrow.py +163 -0
  677. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_arrow_map.py +38 -0
  678. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_arrow_python_udf.py +48 -0
  679. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_catalog.py +36 -0
  680. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_column.py +55 -0
  681. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_conf.py +36 -0
  682. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_dataframe.py +96 -0
  683. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_datasources.py +44 -0
  684. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_errors.py +36 -0
  685. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_functions.py +59 -0
  686. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_group.py +36 -0
  687. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_pandas_cogrouped_map.py +59 -0
  688. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_pandas_grouped_map.py +74 -0
  689. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_pandas_grouped_map_with_state.py +62 -0
  690. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_pandas_map.py +58 -0
  691. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_pandas_udf.py +70 -0
  692. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_pandas_udf_grouped_agg.py +50 -0
  693. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_pandas_udf_scalar.py +68 -0
  694. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_pandas_udf_window.py +40 -0
  695. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_readwriter.py +46 -0
  696. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_serde.py +44 -0
  697. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_types.py +100 -0
  698. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_udf.py +100 -0
  699. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_udtf.py +163 -0
  700. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_session.py +181 -0
  701. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_utils.py +42 -0
  702. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/__init__.py +16 -0
  703. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_cogrouped_map.py +623 -0
  704. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_grouped_map.py +869 -0
  705. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_grouped_map_with_state.py +342 -0
  706. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_map.py +436 -0
  707. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_udf.py +363 -0
  708. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_udf_grouped_agg.py +592 -0
  709. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_udf_scalar.py +1503 -0
  710. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_udf_typehints.py +392 -0
  711. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_udf_typehints_with_future_annotations.py +375 -0
  712. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_udf_window.py +411 -0
  713. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/streaming/__init__.py +16 -0
  714. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/streaming/test_streaming.py +401 -0
  715. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/streaming/test_streaming_foreach.py +295 -0
  716. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/streaming/test_streaming_foreach_batch.py +106 -0
  717. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/streaming/test_streaming_listener.py +558 -0
  718. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_arrow.py +1346 -0
  719. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_arrow_map.py +182 -0
  720. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_arrow_python_udf.py +202 -0
  721. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_catalog.py +503 -0
  722. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_column.py +225 -0
  723. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_conf.py +83 -0
  724. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_context.py +201 -0
  725. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_dataframe.py +1931 -0
  726. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_datasources.py +256 -0
  727. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_errors.py +69 -0
  728. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_functions.py +1349 -0
  729. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_group.py +53 -0
  730. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_pandas_sqlmetrics.py +68 -0
  731. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_readwriter.py +283 -0
  732. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_serde.py +155 -0
  733. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_session.py +412 -0
  734. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_types.py +1581 -0
  735. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_udf.py +961 -0
  736. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_udf_profiler.py +165 -0
  737. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_udtf.py +1456 -0
  738. snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_utils.py +1686 -0
  739. snowflake/snowpark_connect/includes/python/pyspark/sql/types.py +2558 -0
  740. snowflake/snowpark_connect/includes/python/pyspark/sql/udf.py +714 -0
  741. snowflake/snowpark_connect/includes/python/pyspark/sql/udtf.py +325 -0
  742. snowflake/snowpark_connect/includes/python/pyspark/sql/utils.py +339 -0
  743. snowflake/snowpark_connect/includes/python/pyspark/sql/window.py +492 -0
  744. snowflake/snowpark_connect/includes/python/pyspark/statcounter.py +165 -0
  745. snowflake/snowpark_connect/includes/python/pyspark/status.py +112 -0
  746. snowflake/snowpark_connect/includes/python/pyspark/storagelevel.py +97 -0
  747. snowflake/snowpark_connect/includes/python/pyspark/streaming/__init__.py +22 -0
  748. snowflake/snowpark_connect/includes/python/pyspark/streaming/context.py +471 -0
  749. snowflake/snowpark_connect/includes/python/pyspark/streaming/dstream.py +933 -0
  750. snowflake/snowpark_connect/includes/python/pyspark/streaming/kinesis.py +205 -0
  751. snowflake/snowpark_connect/includes/python/pyspark/streaming/listener.py +83 -0
  752. snowflake/snowpark_connect/includes/python/pyspark/streaming/tests/__init__.py +16 -0
  753. snowflake/snowpark_connect/includes/python/pyspark/streaming/tests/test_context.py +184 -0
  754. snowflake/snowpark_connect/includes/python/pyspark/streaming/tests/test_dstream.py +706 -0
  755. snowflake/snowpark_connect/includes/python/pyspark/streaming/tests/test_kinesis.py +118 -0
  756. snowflake/snowpark_connect/includes/python/pyspark/streaming/tests/test_listener.py +160 -0
  757. snowflake/snowpark_connect/includes/python/pyspark/streaming/util.py +168 -0
  758. snowflake/snowpark_connect/includes/python/pyspark/taskcontext.py +502 -0
  759. snowflake/snowpark_connect/includes/python/pyspark/testing/__init__.py +21 -0
  760. snowflake/snowpark_connect/includes/python/pyspark/testing/connectutils.py +199 -0
  761. snowflake/snowpark_connect/includes/python/pyspark/testing/mllibutils.py +30 -0
  762. snowflake/snowpark_connect/includes/python/pyspark/testing/mlutils.py +275 -0
  763. snowflake/snowpark_connect/includes/python/pyspark/testing/objects.py +121 -0
  764. snowflake/snowpark_connect/includes/python/pyspark/testing/pandasutils.py +714 -0
  765. snowflake/snowpark_connect/includes/python/pyspark/testing/sqlutils.py +168 -0
  766. snowflake/snowpark_connect/includes/python/pyspark/testing/streamingutils.py +178 -0
  767. snowflake/snowpark_connect/includes/python/pyspark/testing/utils.py +636 -0
  768. snowflake/snowpark_connect/includes/python/pyspark/tests/__init__.py +16 -0
  769. snowflake/snowpark_connect/includes/python/pyspark/tests/test_appsubmit.py +306 -0
  770. snowflake/snowpark_connect/includes/python/pyspark/tests/test_broadcast.py +196 -0
  771. snowflake/snowpark_connect/includes/python/pyspark/tests/test_conf.py +44 -0
  772. snowflake/snowpark_connect/includes/python/pyspark/tests/test_context.py +346 -0
  773. snowflake/snowpark_connect/includes/python/pyspark/tests/test_daemon.py +89 -0
  774. snowflake/snowpark_connect/includes/python/pyspark/tests/test_install_spark.py +124 -0
  775. snowflake/snowpark_connect/includes/python/pyspark/tests/test_join.py +69 -0
  776. snowflake/snowpark_connect/includes/python/pyspark/tests/test_memory_profiler.py +167 -0
  777. snowflake/snowpark_connect/includes/python/pyspark/tests/test_pin_thread.py +194 -0
  778. snowflake/snowpark_connect/includes/python/pyspark/tests/test_profiler.py +168 -0
  779. snowflake/snowpark_connect/includes/python/pyspark/tests/test_rdd.py +939 -0
  780. snowflake/snowpark_connect/includes/python/pyspark/tests/test_rddbarrier.py +52 -0
  781. snowflake/snowpark_connect/includes/python/pyspark/tests/test_rddsampler.py +66 -0
  782. snowflake/snowpark_connect/includes/python/pyspark/tests/test_readwrite.py +368 -0
  783. snowflake/snowpark_connect/includes/python/pyspark/tests/test_serializers.py +257 -0
  784. snowflake/snowpark_connect/includes/python/pyspark/tests/test_shuffle.py +267 -0
  785. snowflake/snowpark_connect/includes/python/pyspark/tests/test_stage_sched.py +153 -0
  786. snowflake/snowpark_connect/includes/python/pyspark/tests/test_statcounter.py +130 -0
  787. snowflake/snowpark_connect/includes/python/pyspark/tests/test_taskcontext.py +350 -0
  788. snowflake/snowpark_connect/includes/python/pyspark/tests/test_util.py +97 -0
  789. snowflake/snowpark_connect/includes/python/pyspark/tests/test_worker.py +271 -0
  790. snowflake/snowpark_connect/includes/python/pyspark/traceback_utils.py +81 -0
  791. snowflake/snowpark_connect/includes/python/pyspark/util.py +416 -0
  792. snowflake/snowpark_connect/includes/python/pyspark/version.py +19 -0
  793. snowflake/snowpark_connect/includes/python/pyspark/worker.py +1307 -0
  794. snowflake/snowpark_connect/includes/python/pyspark/worker_util.py +46 -0
  795. snowflake/snowpark_connect/proto/__init__.py +10 -0
  796. snowflake/snowpark_connect/proto/control_pb2.py +35 -0
  797. snowflake/snowpark_connect/proto/control_pb2.pyi +38 -0
  798. snowflake/snowpark_connect/proto/control_pb2_grpc.py +183 -0
  799. snowflake/snowpark_connect/proto/snowflake_expression_ext_pb2.py +35 -0
  800. snowflake/snowpark_connect/proto/snowflake_expression_ext_pb2.pyi +53 -0
  801. snowflake/snowpark_connect/proto/snowflake_rdd_pb2.pyi +39 -0
  802. snowflake/snowpark_connect/proto/snowflake_relation_ext_pb2.py +47 -0
  803. snowflake/snowpark_connect/proto/snowflake_relation_ext_pb2.pyi +111 -0
  804. snowflake/snowpark_connect/relation/__init__.py +3 -0
  805. snowflake/snowpark_connect/relation/catalogs/__init__.py +12 -0
  806. snowflake/snowpark_connect/relation/catalogs/abstract_spark_catalog.py +287 -0
  807. snowflake/snowpark_connect/relation/catalogs/snowflake_catalog.py +467 -0
  808. snowflake/snowpark_connect/relation/catalogs/utils.py +51 -0
  809. snowflake/snowpark_connect/relation/io_utils.py +76 -0
  810. snowflake/snowpark_connect/relation/map_aggregate.py +322 -0
  811. snowflake/snowpark_connect/relation/map_catalog.py +151 -0
  812. snowflake/snowpark_connect/relation/map_column_ops.py +1068 -0
  813. snowflake/snowpark_connect/relation/map_crosstab.py +48 -0
  814. snowflake/snowpark_connect/relation/map_extension.py +412 -0
  815. snowflake/snowpark_connect/relation/map_join.py +341 -0
  816. snowflake/snowpark_connect/relation/map_local_relation.py +326 -0
  817. snowflake/snowpark_connect/relation/map_map_partitions.py +146 -0
  818. snowflake/snowpark_connect/relation/map_relation.py +253 -0
  819. snowflake/snowpark_connect/relation/map_row_ops.py +716 -0
  820. snowflake/snowpark_connect/relation/map_sample_by.py +35 -0
  821. snowflake/snowpark_connect/relation/map_show_string.py +50 -0
  822. snowflake/snowpark_connect/relation/map_sql.py +1874 -0
  823. snowflake/snowpark_connect/relation/map_stats.py +324 -0
  824. snowflake/snowpark_connect/relation/map_subquery_alias.py +32 -0
  825. snowflake/snowpark_connect/relation/map_udtf.py +288 -0
  826. snowflake/snowpark_connect/relation/read/__init__.py +7 -0
  827. snowflake/snowpark_connect/relation/read/jdbc_read_dbapi.py +668 -0
  828. snowflake/snowpark_connect/relation/read/map_read.py +367 -0
  829. snowflake/snowpark_connect/relation/read/map_read_csv.py +142 -0
  830. snowflake/snowpark_connect/relation/read/map_read_jdbc.py +108 -0
  831. snowflake/snowpark_connect/relation/read/map_read_json.py +344 -0
  832. snowflake/snowpark_connect/relation/read/map_read_parquet.py +194 -0
  833. snowflake/snowpark_connect/relation/read/map_read_socket.py +59 -0
  834. snowflake/snowpark_connect/relation/read/map_read_table.py +109 -0
  835. snowflake/snowpark_connect/relation/read/map_read_text.py +106 -0
  836. snowflake/snowpark_connect/relation/read/reader_config.py +399 -0
  837. snowflake/snowpark_connect/relation/read/utils.py +155 -0
  838. snowflake/snowpark_connect/relation/stage_locator.py +161 -0
  839. snowflake/snowpark_connect/relation/utils.py +219 -0
  840. snowflake/snowpark_connect/relation/write/__init__.py +3 -0
  841. snowflake/snowpark_connect/relation/write/jdbc_write_dbapi.py +339 -0
  842. snowflake/snowpark_connect/relation/write/map_write.py +436 -0
  843. snowflake/snowpark_connect/relation/write/map_write_jdbc.py +48 -0
  844. snowflake/snowpark_connect/resources/java_udfs-1.0-SNAPSHOT.jar +0 -0
  845. snowflake/snowpark_connect/resources_initializer.py +75 -0
  846. snowflake/snowpark_connect/server.py +1136 -0
  847. snowflake/snowpark_connect/start_server.py +32 -0
  848. snowflake/snowpark_connect/tcm.py +8 -0
  849. snowflake/snowpark_connect/type_mapping.py +1003 -0
  850. snowflake/snowpark_connect/typed_column.py +94 -0
  851. snowflake/snowpark_connect/utils/__init__.py +3 -0
  852. snowflake/snowpark_connect/utils/artifacts.py +48 -0
  853. snowflake/snowpark_connect/utils/attribute_handling.py +72 -0
  854. snowflake/snowpark_connect/utils/cache.py +84 -0
  855. snowflake/snowpark_connect/utils/concurrent.py +124 -0
  856. snowflake/snowpark_connect/utils/context.py +390 -0
  857. snowflake/snowpark_connect/utils/describe_query_cache.py +231 -0
  858. snowflake/snowpark_connect/utils/interrupt.py +85 -0
  859. snowflake/snowpark_connect/utils/io_utils.py +35 -0
  860. snowflake/snowpark_connect/utils/pandas_udtf_utils.py +117 -0
  861. snowflake/snowpark_connect/utils/profiling.py +47 -0
  862. snowflake/snowpark_connect/utils/session.py +180 -0
  863. snowflake/snowpark_connect/utils/snowpark_connect_logging.py +38 -0
  864. snowflake/snowpark_connect/utils/telemetry.py +513 -0
  865. snowflake/snowpark_connect/utils/udf_cache.py +392 -0
  866. snowflake/snowpark_connect/utils/udf_helper.py +328 -0
  867. snowflake/snowpark_connect/utils/udf_utils.py +310 -0
  868. snowflake/snowpark_connect/utils/udtf_helper.py +420 -0
  869. snowflake/snowpark_connect/utils/udtf_utils.py +799 -0
  870. snowflake/snowpark_connect/utils/xxhash64.py +247 -0
  871. snowflake/snowpark_connect/version.py +6 -0
  872. snowpark_connect-0.20.2.data/scripts/snowpark-connect +71 -0
  873. snowpark_connect-0.20.2.data/scripts/snowpark-session +11 -0
  874. snowpark_connect-0.20.2.data/scripts/snowpark-submit +354 -0
  875. snowpark_connect-0.20.2.dist-info/METADATA +37 -0
  876. snowpark_connect-0.20.2.dist-info/RECORD +879 -0
  877. snowpark_connect-0.20.2.dist-info/WHEEL +5 -0
  878. snowpark_connect-0.20.2.dist-info/licenses/LICENSE.txt +202 -0
  879. snowpark_connect-0.20.2.dist-info/top_level.txt +1 -0
@@ -0,0 +1,1166 @@
1
+ #
2
+ # Licensed to the Apache Software Foundation (ASF) under one or more
3
+ # contributor license agreements. See the NOTICE file distributed with
4
+ # this work for additional information regarding copyright ownership.
5
+ # The ASF licenses this file to You under the Apache License, Version 2.0
6
+ # (the "License"); you may not use this file except in compliance with
7
+ # the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing, software
12
+ # distributed under the License is distributed on an "AS IS" BASIS,
13
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ # See the License for the specific language governing permissions and
15
+ # limitations under the License.
16
+ #
17
+
18
+ import sys
19
+ from abc import abstractmethod, ABCMeta
20
+
21
+ from typing import Any, Dict, Optional, TYPE_CHECKING
22
+
23
+ from pyspark import since, keyword_only
24
+ from pyspark.ml.wrapper import JavaParams
25
+ from pyspark.ml.param import Param, Params, TypeConverters
26
+ from pyspark.ml.param.shared import (
27
+ HasLabelCol,
28
+ HasPredictionCol,
29
+ HasProbabilityCol,
30
+ HasRawPredictionCol,
31
+ HasFeaturesCol,
32
+ HasWeightCol,
33
+ )
34
+ from pyspark.ml.common import inherit_doc
35
+ from pyspark.ml.util import JavaMLReadable, JavaMLWritable
36
+ from pyspark.sql.dataframe import DataFrame
37
+
38
+ if TYPE_CHECKING:
39
+ from pyspark.ml._typing import (
40
+ ParamMap,
41
+ BinaryClassificationEvaluatorMetricType,
42
+ ClusteringEvaluatorDistanceMeasureType,
43
+ ClusteringEvaluatorMetricType,
44
+ MulticlassClassificationEvaluatorMetricType,
45
+ MultilabelClassificationEvaluatorMetricType,
46
+ RankingEvaluatorMetricType,
47
+ RegressionEvaluatorMetricType,
48
+ )
49
+
50
+
51
+ __all__ = [
52
+ "Evaluator",
53
+ "BinaryClassificationEvaluator",
54
+ "RegressionEvaluator",
55
+ "MulticlassClassificationEvaluator",
56
+ "MultilabelClassificationEvaluator",
57
+ "ClusteringEvaluator",
58
+ "RankingEvaluator",
59
+ ]
60
+
61
+
62
+ @inherit_doc
63
+ class Evaluator(Params, metaclass=ABCMeta):
64
+ """
65
+ Base class for evaluators that compute metrics from predictions.
66
+
67
+ .. versionadded:: 1.4.0
68
+ """
69
+
70
+ @abstractmethod
71
+ def _evaluate(self, dataset: DataFrame) -> float:
72
+ """
73
+ Evaluates the output.
74
+
75
+ Parameters
76
+ ----------
77
+ dataset : :py:class:`pyspark.sql.DataFrame`
78
+ a dataset that contains labels/observations and predictions
79
+
80
+ Returns
81
+ -------
82
+ float
83
+ metric
84
+ """
85
+ raise NotImplementedError()
86
+
87
+ def evaluate(self, dataset: DataFrame, params: Optional["ParamMap"] = None) -> float:
88
+ """
89
+ Evaluates the output with optional parameters.
90
+
91
+ .. versionadded:: 1.4.0
92
+
93
+ Parameters
94
+ ----------
95
+ dataset : :py:class:`pyspark.sql.DataFrame`
96
+ a dataset that contains labels/observations and predictions
97
+ params : dict, optional
98
+ an optional param map that overrides embedded params
99
+
100
+ Returns
101
+ -------
102
+ float
103
+ metric
104
+ """
105
+ if params is None:
106
+ params = dict()
107
+ if isinstance(params, dict):
108
+ if params:
109
+ return self.copy(params)._evaluate(dataset)
110
+ else:
111
+ return self._evaluate(dataset)
112
+ else:
113
+ raise TypeError("Params must be a param map but got %s." % type(params))
114
+
115
+ @since("1.5.0")
116
+ def isLargerBetter(self) -> bool:
117
+ """
118
+ Indicates whether the metric returned by :py:meth:`evaluate` should be maximized
119
+ (True, default) or minimized (False).
120
+ A given evaluator may support multiple metrics which may be maximized or minimized.
121
+ """
122
+ return True
123
+
124
+
125
+ @inherit_doc
126
+ class JavaEvaluator(JavaParams, Evaluator, metaclass=ABCMeta):
127
+ """
128
+ Base class for :py:class:`Evaluator`s that wrap Java/Scala
129
+ implementations.
130
+ """
131
+
132
+ def _evaluate(self, dataset: DataFrame) -> float:
133
+ """
134
+ Evaluates the output.
135
+
136
+ Parameters
137
+ ----------
138
+ dataset : :py:class:`pyspark.sql.DataFrame`
139
+ a dataset that contains labels/observations and predictions
140
+
141
+ Returns
142
+ -------
143
+ float
144
+ evaluation metric
145
+ """
146
+ self._transfer_params_to_java()
147
+ assert self._java_obj is not None
148
+ return self._java_obj.evaluate(dataset._jdf)
149
+
150
+ def isLargerBetter(self) -> bool:
151
+ self._transfer_params_to_java()
152
+ assert self._java_obj is not None
153
+ return self._java_obj.isLargerBetter()
154
+
155
+
156
+ @inherit_doc
157
+ class BinaryClassificationEvaluator(
158
+ JavaEvaluator,
159
+ HasLabelCol,
160
+ HasRawPredictionCol,
161
+ HasWeightCol,
162
+ JavaMLReadable["BinaryClassificationEvaluator"],
163
+ JavaMLWritable,
164
+ ):
165
+ """
166
+ Evaluator for binary classification, which expects input columns rawPrediction, label
167
+ and an optional weight column.
168
+ The rawPrediction column can be of type double (binary 0/1 prediction, or probability of label
169
+ 1) or of type vector (length-2 vector of raw predictions, scores, or label probabilities).
170
+
171
+ .. versionadded:: 1.4.0
172
+
173
+ Examples
174
+ --------
175
+ >>> from pyspark.ml.linalg import Vectors
176
+ >>> scoreAndLabels = map(lambda x: (Vectors.dense([1.0 - x[0], x[0]]), x[1]),
177
+ ... [(0.1, 0.0), (0.1, 1.0), (0.4, 0.0), (0.6, 0.0), (0.6, 1.0), (0.6, 1.0), (0.8, 1.0)])
178
+ >>> dataset = spark.createDataFrame(scoreAndLabels, ["raw", "label"])
179
+ ...
180
+ >>> evaluator = BinaryClassificationEvaluator()
181
+ >>> evaluator.setRawPredictionCol("raw")
182
+ BinaryClassificationEvaluator...
183
+ >>> evaluator.evaluate(dataset)
184
+ 0.70...
185
+ >>> evaluator.evaluate(dataset, {evaluator.metricName: "areaUnderPR"})
186
+ 0.83...
187
+ >>> bce_path = temp_path + "/bce"
188
+ >>> evaluator.save(bce_path)
189
+ >>> evaluator2 = BinaryClassificationEvaluator.load(bce_path)
190
+ >>> str(evaluator2.getRawPredictionCol())
191
+ 'raw'
192
+ >>> scoreAndLabelsAndWeight = map(lambda x: (Vectors.dense([1.0 - x[0], x[0]]), x[1], x[2]),
193
+ ... [(0.1, 0.0, 1.0), (0.1, 1.0, 0.9), (0.4, 0.0, 0.7), (0.6, 0.0, 0.9),
194
+ ... (0.6, 1.0, 1.0), (0.6, 1.0, 0.3), (0.8, 1.0, 1.0)])
195
+ >>> dataset = spark.createDataFrame(scoreAndLabelsAndWeight, ["raw", "label", "weight"])
196
+ ...
197
+ >>> evaluator = BinaryClassificationEvaluator(rawPredictionCol="raw", weightCol="weight")
198
+ >>> evaluator.evaluate(dataset)
199
+ 0.70...
200
+ >>> evaluator.evaluate(dataset, {evaluator.metricName: "areaUnderPR"})
201
+ 0.82...
202
+ >>> evaluator.getNumBins()
203
+ 1000
204
+ """
205
+
206
+ metricName: Param["BinaryClassificationEvaluatorMetricType"] = Param(
207
+ Params._dummy(),
208
+ "metricName",
209
+ "metric name in evaluation (areaUnderROC|areaUnderPR)",
210
+ typeConverter=TypeConverters.toString, # type: ignore[arg-type]
211
+ )
212
+
213
+ numBins: Param[int] = Param(
214
+ Params._dummy(),
215
+ "numBins",
216
+ "Number of bins to down-sample the curves "
217
+ "(ROC curve, PR curve) in area computation. If 0, no down-sampling will "
218
+ "occur. Must be >= 0.",
219
+ typeConverter=TypeConverters.toInt,
220
+ )
221
+
222
+ _input_kwargs: Dict[str, Any]
223
+
224
+ @keyword_only
225
+ def __init__(
226
+ self,
227
+ *,
228
+ rawPredictionCol: str = "rawPrediction",
229
+ labelCol: str = "label",
230
+ metricName: "BinaryClassificationEvaluatorMetricType" = "areaUnderROC",
231
+ weightCol: Optional[str] = None,
232
+ numBins: int = 1000,
233
+ ):
234
+ """
235
+ __init__(self, \\*, rawPredictionCol="rawPrediction", labelCol="label", \
236
+ metricName="areaUnderROC", weightCol=None, numBins=1000)
237
+ """
238
+ super(BinaryClassificationEvaluator, self).__init__()
239
+ self._java_obj = self._new_java_obj(
240
+ "org.apache.spark.ml.evaluation.BinaryClassificationEvaluator", self.uid
241
+ )
242
+ self._setDefault(metricName="areaUnderROC", numBins=1000)
243
+ kwargs = self._input_kwargs
244
+ self._set(**kwargs)
245
+
246
+ @since("1.4.0")
247
+ def setMetricName(
248
+ self, value: "BinaryClassificationEvaluatorMetricType"
249
+ ) -> "BinaryClassificationEvaluator":
250
+ """
251
+ Sets the value of :py:attr:`metricName`.
252
+ """
253
+ return self._set(metricName=value)
254
+
255
+ @since("1.4.0")
256
+ def getMetricName(self) -> str:
257
+ """
258
+ Gets the value of metricName or its default value.
259
+ """
260
+ return self.getOrDefault(self.metricName)
261
+
262
+ @since("3.0.0")
263
+ def setNumBins(self, value: int) -> "BinaryClassificationEvaluator":
264
+ """
265
+ Sets the value of :py:attr:`numBins`.
266
+ """
267
+ return self._set(numBins=value)
268
+
269
+ @since("3.0.0")
270
+ def getNumBins(self) -> int:
271
+ """
272
+ Gets the value of numBins or its default value.
273
+ """
274
+ return self.getOrDefault(self.numBins)
275
+
276
+ def setLabelCol(self, value: str) -> "BinaryClassificationEvaluator":
277
+ """
278
+ Sets the value of :py:attr:`labelCol`.
279
+ """
280
+ return self._set(labelCol=value)
281
+
282
+ def setRawPredictionCol(self, value: str) -> "BinaryClassificationEvaluator":
283
+ """
284
+ Sets the value of :py:attr:`rawPredictionCol`.
285
+ """
286
+ return self._set(rawPredictionCol=value)
287
+
288
+ @since("3.0.0")
289
+ def setWeightCol(self, value: str) -> "BinaryClassificationEvaluator":
290
+ """
291
+ Sets the value of :py:attr:`weightCol`.
292
+ """
293
+ return self._set(weightCol=value)
294
+
295
+ @keyword_only
296
+ @since("1.4.0")
297
+ def setParams(
298
+ self,
299
+ *,
300
+ rawPredictionCol: str = "rawPrediction",
301
+ labelCol: str = "label",
302
+ metricName: "BinaryClassificationEvaluatorMetricType" = "areaUnderROC",
303
+ weightCol: Optional[str] = None,
304
+ numBins: int = 1000,
305
+ ) -> "BinaryClassificationEvaluator":
306
+ """
307
+ setParams(self, \\*, rawPredictionCol="rawPrediction", labelCol="label", \
308
+ metricName="areaUnderROC", weightCol=None, numBins=1000)
309
+ Sets params for binary classification evaluator.
310
+ """
311
+ kwargs = self._input_kwargs
312
+ return self._set(**kwargs)
313
+
314
+
315
+ @inherit_doc
316
+ class RegressionEvaluator(
317
+ JavaEvaluator,
318
+ HasLabelCol,
319
+ HasPredictionCol,
320
+ HasWeightCol,
321
+ JavaMLReadable["RegressionEvaluator"],
322
+ JavaMLWritable,
323
+ ):
324
+ """
325
+ Evaluator for Regression, which expects input columns prediction, label
326
+ and an optional weight column.
327
+
328
+ .. versionadded:: 1.4.0
329
+
330
+ Examples
331
+ --------
332
+ >>> scoreAndLabels = [(-28.98343821, -27.0), (20.21491975, 21.5),
333
+ ... (-25.98418959, -22.0), (30.69731842, 33.0), (74.69283752, 71.0)]
334
+ >>> dataset = spark.createDataFrame(scoreAndLabels, ["raw", "label"])
335
+ ...
336
+ >>> evaluator = RegressionEvaluator()
337
+ >>> evaluator.setPredictionCol("raw")
338
+ RegressionEvaluator...
339
+ >>> evaluator.evaluate(dataset)
340
+ 2.842...
341
+ >>> evaluator.evaluate(dataset, {evaluator.metricName: "r2"})
342
+ 0.993...
343
+ >>> evaluator.evaluate(dataset, {evaluator.metricName: "mae"})
344
+ 2.649...
345
+ >>> re_path = temp_path + "/re"
346
+ >>> evaluator.save(re_path)
347
+ >>> evaluator2 = RegressionEvaluator.load(re_path)
348
+ >>> str(evaluator2.getPredictionCol())
349
+ 'raw'
350
+ >>> scoreAndLabelsAndWeight = [(-28.98343821, -27.0, 1.0), (20.21491975, 21.5, 0.8),
351
+ ... (-25.98418959, -22.0, 1.0), (30.69731842, 33.0, 0.6), (74.69283752, 71.0, 0.2)]
352
+ >>> dataset = spark.createDataFrame(scoreAndLabelsAndWeight, ["raw", "label", "weight"])
353
+ ...
354
+ >>> evaluator = RegressionEvaluator(predictionCol="raw", weightCol="weight")
355
+ >>> evaluator.evaluate(dataset)
356
+ 2.740...
357
+ >>> evaluator.getThroughOrigin()
358
+ False
359
+ """
360
+
361
+ metricName: Param["RegressionEvaluatorMetricType"] = Param(
362
+ Params._dummy(),
363
+ "metricName",
364
+ """metric name in evaluation - one of:
365
+ rmse - root mean squared error (default)
366
+ mse - mean squared error
367
+ r2 - r^2 metric
368
+ mae - mean absolute error
369
+ var - explained variance.""",
370
+ typeConverter=TypeConverters.toString, # type: ignore[arg-type]
371
+ )
372
+
373
+ throughOrigin: Param[bool] = Param(
374
+ Params._dummy(),
375
+ "throughOrigin",
376
+ "whether the regression is through the origin.",
377
+ typeConverter=TypeConverters.toBoolean,
378
+ )
379
+
380
+ _input_kwargs: Dict[str, Any]
381
+
382
+ @keyword_only
383
+ def __init__(
384
+ self,
385
+ *,
386
+ predictionCol: str = "prediction",
387
+ labelCol: str = "label",
388
+ metricName: "RegressionEvaluatorMetricType" = "rmse",
389
+ weightCol: Optional[str] = None,
390
+ throughOrigin: bool = False,
391
+ ):
392
+ """
393
+ __init__(self, \\*, predictionCol="prediction", labelCol="label", \
394
+ metricName="rmse", weightCol=None, throughOrigin=False)
395
+ """
396
+ super(RegressionEvaluator, self).__init__()
397
+ self._java_obj = self._new_java_obj(
398
+ "org.apache.spark.ml.evaluation.RegressionEvaluator", self.uid
399
+ )
400
+ self._setDefault(metricName="rmse", throughOrigin=False)
401
+ kwargs = self._input_kwargs
402
+ self._set(**kwargs)
403
+
404
+ @since("1.4.0")
405
+ def setMetricName(self, value: "RegressionEvaluatorMetricType") -> "RegressionEvaluator":
406
+ """
407
+ Sets the value of :py:attr:`metricName`.
408
+ """
409
+ return self._set(metricName=value)
410
+
411
+ @since("1.4.0")
412
+ def getMetricName(self) -> "RegressionEvaluatorMetricType":
413
+ """
414
+ Gets the value of metricName or its default value.
415
+ """
416
+ return self.getOrDefault(self.metricName)
417
+
418
+ @since("3.0.0")
419
+ def setThroughOrigin(self, value: bool) -> "RegressionEvaluator":
420
+ """
421
+ Sets the value of :py:attr:`throughOrigin`.
422
+ """
423
+ return self._set(throughOrigin=value)
424
+
425
+ @since("3.0.0")
426
+ def getThroughOrigin(self) -> bool:
427
+ """
428
+ Gets the value of throughOrigin or its default value.
429
+ """
430
+ return self.getOrDefault(self.throughOrigin)
431
+
432
+ def setLabelCol(self, value: str) -> "RegressionEvaluator":
433
+ """
434
+ Sets the value of :py:attr:`labelCol`.
435
+ """
436
+ return self._set(labelCol=value)
437
+
438
+ def setPredictionCol(self, value: str) -> "RegressionEvaluator":
439
+ """
440
+ Sets the value of :py:attr:`predictionCol`.
441
+ """
442
+ return self._set(predictionCol=value)
443
+
444
+ @since("3.0.0")
445
+ def setWeightCol(self, value: str) -> "RegressionEvaluator":
446
+ """
447
+ Sets the value of :py:attr:`weightCol`.
448
+ """
449
+ return self._set(weightCol=value)
450
+
451
+ @keyword_only
452
+ @since("1.4.0")
453
+ def setParams(
454
+ self,
455
+ *,
456
+ predictionCol: str = "prediction",
457
+ labelCol: str = "label",
458
+ metricName: "RegressionEvaluatorMetricType" = "rmse",
459
+ weightCol: Optional[str] = None,
460
+ throughOrigin: bool = False,
461
+ ) -> "RegressionEvaluator":
462
+ """
463
+ setParams(self, \\*, predictionCol="prediction", labelCol="label", \
464
+ metricName="rmse", weightCol=None, throughOrigin=False)
465
+ Sets params for regression evaluator.
466
+ """
467
+ kwargs = self._input_kwargs
468
+ return self._set(**kwargs)
469
+
470
+
471
+ @inherit_doc
472
+ class MulticlassClassificationEvaluator(
473
+ JavaEvaluator,
474
+ HasLabelCol,
475
+ HasPredictionCol,
476
+ HasWeightCol,
477
+ HasProbabilityCol,
478
+ JavaMLReadable["MulticlassClassificationEvaluator"],
479
+ JavaMLWritable,
480
+ ):
481
+ """
482
+ Evaluator for Multiclass Classification, which expects input
483
+ columns: prediction, label, weight (optional) and probabilityCol (only for logLoss).
484
+
485
+ .. versionadded:: 1.5.0
486
+
487
+ Examples
488
+ --------
489
+ >>> scoreAndLabels = [(0.0, 0.0), (0.0, 1.0), (0.0, 0.0),
490
+ ... (1.0, 0.0), (1.0, 1.0), (1.0, 1.0), (1.0, 1.0), (2.0, 2.0), (2.0, 0.0)]
491
+ >>> dataset = spark.createDataFrame(scoreAndLabels, ["prediction", "label"])
492
+ >>> evaluator = MulticlassClassificationEvaluator()
493
+ >>> evaluator.setPredictionCol("prediction")
494
+ MulticlassClassificationEvaluator...
495
+ >>> evaluator.evaluate(dataset)
496
+ 0.66...
497
+ >>> evaluator.evaluate(dataset, {evaluator.metricName: "accuracy"})
498
+ 0.66...
499
+ >>> evaluator.evaluate(dataset, {evaluator.metricName: "truePositiveRateByLabel",
500
+ ... evaluator.metricLabel: 1.0})
501
+ 0.75...
502
+ >>> evaluator.setMetricName("hammingLoss")
503
+ MulticlassClassificationEvaluator...
504
+ >>> evaluator.evaluate(dataset)
505
+ 0.33...
506
+ >>> mce_path = temp_path + "/mce"
507
+ >>> evaluator.save(mce_path)
508
+ >>> evaluator2 = MulticlassClassificationEvaluator.load(mce_path)
509
+ >>> str(evaluator2.getPredictionCol())
510
+ 'prediction'
511
+ >>> scoreAndLabelsAndWeight = [(0.0, 0.0, 1.0), (0.0, 1.0, 1.0), (0.0, 0.0, 1.0),
512
+ ... (1.0, 0.0, 1.0), (1.0, 1.0, 1.0), (1.0, 1.0, 1.0), (1.0, 1.0, 1.0),
513
+ ... (2.0, 2.0, 1.0), (2.0, 0.0, 1.0)]
514
+ >>> dataset = spark.createDataFrame(scoreAndLabelsAndWeight, ["prediction", "label", "weight"])
515
+ >>> evaluator = MulticlassClassificationEvaluator(predictionCol="prediction",
516
+ ... weightCol="weight")
517
+ >>> evaluator.evaluate(dataset)
518
+ 0.66...
519
+ >>> evaluator.evaluate(dataset, {evaluator.metricName: "accuracy"})
520
+ 0.66...
521
+ >>> predictionAndLabelsWithProbabilities = [
522
+ ... (1.0, 1.0, 1.0, [0.1, 0.8, 0.1]), (0.0, 2.0, 1.0, [0.9, 0.05, 0.05]),
523
+ ... (0.0, 0.0, 1.0, [0.8, 0.2, 0.0]), (1.0, 1.0, 1.0, [0.3, 0.65, 0.05])]
524
+ >>> dataset = spark.createDataFrame(predictionAndLabelsWithProbabilities, ["prediction",
525
+ ... "label", "weight", "probability"])
526
+ >>> evaluator = MulticlassClassificationEvaluator(predictionCol="prediction",
527
+ ... probabilityCol="probability")
528
+ >>> evaluator.setMetricName("logLoss")
529
+ MulticlassClassificationEvaluator...
530
+ >>> evaluator.evaluate(dataset)
531
+ 0.9682...
532
+ """
533
+
534
+ metricName: Param["MulticlassClassificationEvaluatorMetricType"] = Param(
535
+ Params._dummy(),
536
+ "metricName",
537
+ "metric name in evaluation "
538
+ "(f1|accuracy|weightedPrecision|weightedRecall|weightedTruePositiveRate| "
539
+ "weightedFalsePositiveRate|weightedFMeasure|truePositiveRateByLabel| "
540
+ "falsePositiveRateByLabel|precisionByLabel|recallByLabel|fMeasureByLabel| "
541
+ "logLoss|hammingLoss)",
542
+ typeConverter=TypeConverters.toString, # type: ignore[arg-type]
543
+ )
544
+ metricLabel: Param[float] = Param(
545
+ Params._dummy(),
546
+ "metricLabel",
547
+ "The class whose metric will be computed in truePositiveRateByLabel|"
548
+ "falsePositiveRateByLabel|precisionByLabel|recallByLabel|fMeasureByLabel."
549
+ " Must be >= 0. The default value is 0.",
550
+ typeConverter=TypeConverters.toFloat,
551
+ )
552
+ beta: Param[float] = Param(
553
+ Params._dummy(),
554
+ "beta",
555
+ "The beta value used in weightedFMeasure|fMeasureByLabel."
556
+ " Must be > 0. The default value is 1.",
557
+ typeConverter=TypeConverters.toFloat,
558
+ )
559
+ eps: Param[float] = Param(
560
+ Params._dummy(),
561
+ "eps",
562
+ "log-loss is undefined for p=0 or p=1, so probabilities are clipped to "
563
+ "max(eps, min(1 - eps, p)). "
564
+ "Must be in range (0, 0.5). The default value is 1e-15.",
565
+ typeConverter=TypeConverters.toFloat,
566
+ )
567
+
568
+ _input_kwargs: Dict[str, Any]
569
+
570
+ @keyword_only
571
+ def __init__(
572
+ self,
573
+ *,
574
+ predictionCol: str = "prediction",
575
+ labelCol: str = "label",
576
+ metricName: "MulticlassClassificationEvaluatorMetricType" = "f1",
577
+ weightCol: Optional[str] = None,
578
+ metricLabel: float = 0.0,
579
+ beta: float = 1.0,
580
+ probabilityCol: str = "probability",
581
+ eps: float = 1e-15,
582
+ ):
583
+ """
584
+ __init__(self, \\*, predictionCol="prediction", labelCol="label", \
585
+ metricName="f1", weightCol=None, metricLabel=0.0, beta=1.0, \
586
+ probabilityCol="probability", eps=1e-15)
587
+ """
588
+ super(MulticlassClassificationEvaluator, self).__init__()
589
+ self._java_obj = self._new_java_obj(
590
+ "org.apache.spark.ml.evaluation.MulticlassClassificationEvaluator", self.uid
591
+ )
592
+ self._setDefault(metricName="f1", metricLabel=0.0, beta=1.0, eps=1e-15)
593
+ kwargs = self._input_kwargs
594
+ self._set(**kwargs)
595
+
596
+ @since("1.5.0")
597
+ def setMetricName(
598
+ self, value: "MulticlassClassificationEvaluatorMetricType"
599
+ ) -> "MulticlassClassificationEvaluator":
600
+ """
601
+ Sets the value of :py:attr:`metricName`.
602
+ """
603
+ return self._set(metricName=value)
604
+
605
+ @since("1.5.0")
606
+ def getMetricName(self) -> "MulticlassClassificationEvaluatorMetricType":
607
+ """
608
+ Gets the value of metricName or its default value.
609
+ """
610
+ return self.getOrDefault(self.metricName)
611
+
612
+ @since("3.0.0")
613
+ def setMetricLabel(self, value: float) -> "MulticlassClassificationEvaluator":
614
+ """
615
+ Sets the value of :py:attr:`metricLabel`.
616
+ """
617
+ return self._set(metricLabel=value)
618
+
619
+ @since("3.0.0")
620
+ def getMetricLabel(self) -> float:
621
+ """
622
+ Gets the value of metricLabel or its default value.
623
+ """
624
+ return self.getOrDefault(self.metricLabel)
625
+
626
+ @since("3.0.0")
627
+ def setBeta(self, value: float) -> "MulticlassClassificationEvaluator":
628
+ """
629
+ Sets the value of :py:attr:`beta`.
630
+ """
631
+ return self._set(beta=value)
632
+
633
+ @since("3.0.0")
634
+ def getBeta(self) -> float:
635
+ """
636
+ Gets the value of beta or its default value.
637
+ """
638
+ return self.getOrDefault(self.beta)
639
+
640
+ @since("3.0.0")
641
+ def setEps(self, value: float) -> "MulticlassClassificationEvaluator":
642
+ """
643
+ Sets the value of :py:attr:`eps`.
644
+ """
645
+ return self._set(eps=value)
646
+
647
+ @since("3.0.0")
648
+ def getEps(self) -> float:
649
+ """
650
+ Gets the value of eps or its default value.
651
+ """
652
+ return self.getOrDefault(self.eps)
653
+
654
+ def setLabelCol(self, value: str) -> "MulticlassClassificationEvaluator":
655
+ """
656
+ Sets the value of :py:attr:`labelCol`.
657
+ """
658
+ return self._set(labelCol=value)
659
+
660
+ def setPredictionCol(self, value: str) -> "MulticlassClassificationEvaluator":
661
+ """
662
+ Sets the value of :py:attr:`predictionCol`.
663
+ """
664
+ return self._set(predictionCol=value)
665
+
666
+ @since("3.0.0")
667
+ def setProbabilityCol(self, value: str) -> "MulticlassClassificationEvaluator":
668
+ """
669
+ Sets the value of :py:attr:`probabilityCol`.
670
+ """
671
+ return self._set(probabilityCol=value)
672
+
673
+ @since("3.0.0")
674
+ def setWeightCol(self, value: str) -> "MulticlassClassificationEvaluator":
675
+ """
676
+ Sets the value of :py:attr:`weightCol`.
677
+ """
678
+ return self._set(weightCol=value)
679
+
680
+ @keyword_only
681
+ @since("1.5.0")
682
+ def setParams(
683
+ self,
684
+ *,
685
+ predictionCol: str = "prediction",
686
+ labelCol: str = "label",
687
+ metricName: "MulticlassClassificationEvaluatorMetricType" = "f1",
688
+ weightCol: Optional[str] = None,
689
+ metricLabel: float = 0.0,
690
+ beta: float = 1.0,
691
+ probabilityCol: str = "probability",
692
+ eps: float = 1e-15,
693
+ ) -> "MulticlassClassificationEvaluator":
694
+ """
695
+ setParams(self, \\*, predictionCol="prediction", labelCol="label", \
696
+ metricName="f1", weightCol=None, metricLabel=0.0, beta=1.0, \
697
+ probabilityCol="probability", eps=1e-15)
698
+ Sets params for multiclass classification evaluator.
699
+ """
700
+ kwargs = self._input_kwargs
701
+ return self._set(**kwargs)
702
+
703
+
704
+ @inherit_doc
705
+ class MultilabelClassificationEvaluator(
706
+ JavaEvaluator,
707
+ HasLabelCol,
708
+ HasPredictionCol,
709
+ JavaMLReadable["MultilabelClassificationEvaluator"],
710
+ JavaMLWritable,
711
+ ):
712
+ """
713
+ Evaluator for Multilabel Classification, which expects two input
714
+ columns: prediction and label.
715
+
716
+ .. versionadded:: 3.0.0
717
+
718
+ Notes
719
+ -----
720
+ Experimental
721
+
722
+ Examples
723
+ --------
724
+ >>> scoreAndLabels = [([0.0, 1.0], [0.0, 2.0]), ([0.0, 2.0], [0.0, 1.0]),
725
+ ... ([], [0.0]), ([2.0], [2.0]), ([2.0, 0.0], [2.0, 0.0]),
726
+ ... ([0.0, 1.0, 2.0], [0.0, 1.0]), ([1.0], [1.0, 2.0])]
727
+ >>> dataset = spark.createDataFrame(scoreAndLabels, ["prediction", "label"])
728
+ ...
729
+ >>> evaluator = MultilabelClassificationEvaluator()
730
+ >>> evaluator.setPredictionCol("prediction")
731
+ MultilabelClassificationEvaluator...
732
+ >>> evaluator.evaluate(dataset)
733
+ 0.63...
734
+ >>> evaluator.evaluate(dataset, {evaluator.metricName: "accuracy"})
735
+ 0.54...
736
+ >>> mlce_path = temp_path + "/mlce"
737
+ >>> evaluator.save(mlce_path)
738
+ >>> evaluator2 = MultilabelClassificationEvaluator.load(mlce_path)
739
+ >>> str(evaluator2.getPredictionCol())
740
+ 'prediction'
741
+ """
742
+
743
+ metricName: Param["MultilabelClassificationEvaluatorMetricType"] = Param(
744
+ Params._dummy(),
745
+ "metricName",
746
+ "metric name in evaluation "
747
+ "(subsetAccuracy|accuracy|hammingLoss|precision|recall|f1Measure|"
748
+ "precisionByLabel|recallByLabel|f1MeasureByLabel|microPrecision|"
749
+ "microRecall|microF1Measure)",
750
+ typeConverter=TypeConverters.toString, # type: ignore[arg-type]
751
+ )
752
+ metricLabel: Param[float] = Param(
753
+ Params._dummy(),
754
+ "metricLabel",
755
+ "The class whose metric will be computed in precisionByLabel|"
756
+ "recallByLabel|f1MeasureByLabel. "
757
+ "Must be >= 0. The default value is 0.",
758
+ typeConverter=TypeConverters.toFloat,
759
+ )
760
+
761
+ _input_kwargs: Dict[str, Any]
762
+
763
+ @keyword_only
764
+ def __init__(
765
+ self,
766
+ *,
767
+ predictionCol: str = "prediction",
768
+ labelCol: str = "label",
769
+ metricName: "MultilabelClassificationEvaluatorMetricType" = "f1Measure",
770
+ metricLabel: float = 0.0,
771
+ ) -> None:
772
+ """
773
+ __init__(self, \\*, predictionCol="prediction", labelCol="label", \
774
+ metricName="f1Measure", metricLabel=0.0)
775
+ """
776
+ super(MultilabelClassificationEvaluator, self).__init__()
777
+ self._java_obj = self._new_java_obj(
778
+ "org.apache.spark.ml.evaluation.MultilabelClassificationEvaluator", self.uid
779
+ )
780
+ self._setDefault(metricName="f1Measure", metricLabel=0.0)
781
+ kwargs = self._input_kwargs
782
+ self._set(**kwargs)
783
+
784
+ @since("3.0.0")
785
+ def setMetricName(
786
+ self, value: "MultilabelClassificationEvaluatorMetricType"
787
+ ) -> "MultilabelClassificationEvaluator":
788
+ """
789
+ Sets the value of :py:attr:`metricName`.
790
+ """
791
+ return self._set(metricName=value)
792
+
793
+ @since("3.0.0")
794
+ def getMetricName(self) -> "MultilabelClassificationEvaluatorMetricType":
795
+ """
796
+ Gets the value of metricName or its default value.
797
+ """
798
+ return self.getOrDefault(self.metricName)
799
+
800
+ @since("3.0.0")
801
+ def setMetricLabel(self, value: float) -> "MultilabelClassificationEvaluator":
802
+ """
803
+ Sets the value of :py:attr:`metricLabel`.
804
+ """
805
+ return self._set(metricLabel=value)
806
+
807
+ @since("3.0.0")
808
+ def getMetricLabel(self) -> float:
809
+ """
810
+ Gets the value of metricLabel or its default value.
811
+ """
812
+ return self.getOrDefault(self.metricLabel)
813
+
814
+ @since("3.0.0")
815
+ def setLabelCol(self, value: str) -> "MultilabelClassificationEvaluator":
816
+ """
817
+ Sets the value of :py:attr:`labelCol`.
818
+ """
819
+ return self._set(labelCol=value)
820
+
821
+ @since("3.0.0")
822
+ def setPredictionCol(self, value: str) -> "MultilabelClassificationEvaluator":
823
+ """
824
+ Sets the value of :py:attr:`predictionCol`.
825
+ """
826
+ return self._set(predictionCol=value)
827
+
828
+ @keyword_only
829
+ @since("3.0.0")
830
+ def setParams(
831
+ self,
832
+ *,
833
+ predictionCol: str = "prediction",
834
+ labelCol: str = "label",
835
+ metricName: "MultilabelClassificationEvaluatorMetricType" = "f1Measure",
836
+ metricLabel: float = 0.0,
837
+ ) -> "MultilabelClassificationEvaluator":
838
+ """
839
+ setParams(self, \\*, predictionCol="prediction", labelCol="label", \
840
+ metricName="f1Measure", metricLabel=0.0)
841
+ Sets params for multilabel classification evaluator.
842
+ """
843
+ kwargs = self._input_kwargs
844
+ return self._set(**kwargs)
845
+
846
+
847
+ @inherit_doc
848
+ class ClusteringEvaluator(
849
+ JavaEvaluator,
850
+ HasPredictionCol,
851
+ HasFeaturesCol,
852
+ HasWeightCol,
853
+ JavaMLReadable["ClusteringEvaluator"],
854
+ JavaMLWritable,
855
+ ):
856
+ """
857
+ Evaluator for Clustering results, which expects two input
858
+ columns: prediction and features. The metric computes the Silhouette
859
+ measure using the squared Euclidean distance.
860
+
861
+ The Silhouette is a measure for the validation of the consistency
862
+ within clusters. It ranges between 1 and -1, where a value close to
863
+ 1 means that the points in a cluster are close to the other points
864
+ in the same cluster and far from the points of the other clusters.
865
+
866
+ .. versionadded:: 2.3.0
867
+
868
+ Examples
869
+ --------
870
+ >>> from pyspark.ml.linalg import Vectors
871
+ >>> featureAndPredictions = map(lambda x: (Vectors.dense(x[0]), x[1]),
872
+ ... [([0.0, 0.5], 0.0), ([0.5, 0.0], 0.0), ([10.0, 11.0], 1.0),
873
+ ... ([10.5, 11.5], 1.0), ([1.0, 1.0], 0.0), ([8.0, 6.0], 1.0)])
874
+ >>> dataset = spark.createDataFrame(featureAndPredictions, ["features", "prediction"])
875
+ ...
876
+ >>> evaluator = ClusteringEvaluator()
877
+ >>> evaluator.setPredictionCol("prediction")
878
+ ClusteringEvaluator...
879
+ >>> evaluator.evaluate(dataset)
880
+ 0.9079...
881
+ >>> featureAndPredictionsWithWeight = map(lambda x: (Vectors.dense(x[0]), x[1], x[2]),
882
+ ... [([0.0, 0.5], 0.0, 2.5), ([0.5, 0.0], 0.0, 2.5), ([10.0, 11.0], 1.0, 2.5),
883
+ ... ([10.5, 11.5], 1.0, 2.5), ([1.0, 1.0], 0.0, 2.5), ([8.0, 6.0], 1.0, 2.5)])
884
+ >>> dataset = spark.createDataFrame(
885
+ ... featureAndPredictionsWithWeight, ["features", "prediction", "weight"])
886
+ >>> evaluator = ClusteringEvaluator()
887
+ >>> evaluator.setPredictionCol("prediction")
888
+ ClusteringEvaluator...
889
+ >>> evaluator.setWeightCol("weight")
890
+ ClusteringEvaluator...
891
+ >>> evaluator.evaluate(dataset)
892
+ 0.9079...
893
+ >>> ce_path = temp_path + "/ce"
894
+ >>> evaluator.save(ce_path)
895
+ >>> evaluator2 = ClusteringEvaluator.load(ce_path)
896
+ >>> str(evaluator2.getPredictionCol())
897
+ 'prediction'
898
+ """
899
+
900
+ metricName: Param["ClusteringEvaluatorMetricType"] = Param(
901
+ Params._dummy(),
902
+ "metricName",
903
+ "metric name in evaluation (silhouette)",
904
+ typeConverter=TypeConverters.toString, # type: ignore[arg-type]
905
+ )
906
+ distanceMeasure: Param["ClusteringEvaluatorDistanceMeasureType"] = Param(
907
+ Params._dummy(),
908
+ "distanceMeasure",
909
+ "The distance measure. " + "Supported options: 'squaredEuclidean' and 'cosine'.",
910
+ typeConverter=TypeConverters.toString, # type: ignore[arg-type]
911
+ )
912
+
913
+ _input_kwargs: Dict[str, Any]
914
+
915
+ @keyword_only
916
+ def __init__(
917
+ self,
918
+ *,
919
+ predictionCol: str = "prediction",
920
+ featuresCol: str = "features",
921
+ metricName: "ClusteringEvaluatorMetricType" = "silhouette",
922
+ distanceMeasure: str = "squaredEuclidean",
923
+ weightCol: Optional[str] = None,
924
+ ):
925
+ """
926
+ __init__(self, \\*, predictionCol="prediction", featuresCol="features", \
927
+ metricName="silhouette", distanceMeasure="squaredEuclidean", weightCol=None)
928
+ """
929
+ super(ClusteringEvaluator, self).__init__()
930
+ self._java_obj = self._new_java_obj(
931
+ "org.apache.spark.ml.evaluation.ClusteringEvaluator", self.uid
932
+ )
933
+ self._setDefault(metricName="silhouette", distanceMeasure="squaredEuclidean")
934
+ kwargs = self._input_kwargs
935
+ self._set(**kwargs)
936
+
937
+ @keyword_only
938
+ @since("2.3.0")
939
+ def setParams(
940
+ self,
941
+ *,
942
+ predictionCol: str = "prediction",
943
+ featuresCol: str = "features",
944
+ metricName: "ClusteringEvaluatorMetricType" = "silhouette",
945
+ distanceMeasure: str = "squaredEuclidean",
946
+ weightCol: Optional[str] = None,
947
+ ) -> "ClusteringEvaluator":
948
+ """
949
+ setParams(self, \\*, predictionCol="prediction", featuresCol="features", \
950
+ metricName="silhouette", distanceMeasure="squaredEuclidean", weightCol=None)
951
+ Sets params for clustering evaluator.
952
+ """
953
+ kwargs = self._input_kwargs
954
+ return self._set(**kwargs)
955
+
956
+ @since("2.3.0")
957
+ def setMetricName(self, value: "ClusteringEvaluatorMetricType") -> "ClusteringEvaluator":
958
+ """
959
+ Sets the value of :py:attr:`metricName`.
960
+ """
961
+ return self._set(metricName=value)
962
+
963
+ @since("2.3.0")
964
+ def getMetricName(self) -> "ClusteringEvaluatorMetricType":
965
+ """
966
+ Gets the value of metricName or its default value.
967
+ """
968
+ return self.getOrDefault(self.metricName)
969
+
970
+ @since("2.4.0")
971
+ def setDistanceMeasure(
972
+ self, value: "ClusteringEvaluatorDistanceMeasureType"
973
+ ) -> "ClusteringEvaluator":
974
+ """
975
+ Sets the value of :py:attr:`distanceMeasure`.
976
+ """
977
+ return self._set(distanceMeasure=value)
978
+
979
+ @since("2.4.0")
980
+ def getDistanceMeasure(self) -> "ClusteringEvaluatorDistanceMeasureType":
981
+ """
982
+ Gets the value of `distanceMeasure`
983
+ """
984
+ return self.getOrDefault(self.distanceMeasure)
985
+
986
+ def setFeaturesCol(self, value: "str") -> "ClusteringEvaluator":
987
+ """
988
+ Sets the value of :py:attr:`featuresCol`.
989
+ """
990
+ return self._set(featuresCol=value)
991
+
992
+ def setPredictionCol(self, value: str) -> "ClusteringEvaluator":
993
+ """
994
+ Sets the value of :py:attr:`predictionCol`.
995
+ """
996
+ return self._set(predictionCol=value)
997
+
998
+ @since("3.1.0")
999
+ def setWeightCol(self, value: str) -> "ClusteringEvaluator":
1000
+ """
1001
+ Sets the value of :py:attr:`weightCol`.
1002
+ """
1003
+ return self._set(weightCol=value)
1004
+
1005
+
1006
+ @inherit_doc
1007
+ class RankingEvaluator(
1008
+ JavaEvaluator, HasLabelCol, HasPredictionCol, JavaMLReadable["RankingEvaluator"], JavaMLWritable
1009
+ ):
1010
+ """
1011
+ Evaluator for Ranking, which expects two input
1012
+ columns: prediction and label.
1013
+
1014
+ .. versionadded:: 3.0.0
1015
+
1016
+ Notes
1017
+ -----
1018
+ Experimental
1019
+
1020
+ Examples
1021
+ --------
1022
+ >>> scoreAndLabels = [([1.0, 6.0, 2.0, 7.0, 8.0, 3.0, 9.0, 10.0, 4.0, 5.0],
1023
+ ... [1.0, 2.0, 3.0, 4.0, 5.0]),
1024
+ ... ([4.0, 1.0, 5.0, 6.0, 2.0, 7.0, 3.0, 8.0, 9.0, 10.0], [1.0, 2.0, 3.0]),
1025
+ ... ([1.0, 2.0, 3.0, 4.0, 5.0], [])]
1026
+ >>> dataset = spark.createDataFrame(scoreAndLabels, ["prediction", "label"])
1027
+ ...
1028
+ >>> evaluator = RankingEvaluator()
1029
+ >>> evaluator.setPredictionCol("prediction")
1030
+ RankingEvaluator...
1031
+ >>> evaluator.evaluate(dataset)
1032
+ 0.35...
1033
+ >>> evaluator.evaluate(dataset, {evaluator.metricName: "precisionAtK", evaluator.k: 2})
1034
+ 0.33...
1035
+ >>> ranke_path = temp_path + "/ranke"
1036
+ >>> evaluator.save(ranke_path)
1037
+ >>> evaluator2 = RankingEvaluator.load(ranke_path)
1038
+ >>> str(evaluator2.getPredictionCol())
1039
+ 'prediction'
1040
+ """
1041
+
1042
+ metricName: Param["RankingEvaluatorMetricType"] = Param(
1043
+ Params._dummy(),
1044
+ "metricName",
1045
+ "metric name in evaluation "
1046
+ "(meanAveragePrecision|meanAveragePrecisionAtK|"
1047
+ "precisionAtK|ndcgAtK|recallAtK)",
1048
+ typeConverter=TypeConverters.toString, # type: ignore[arg-type]
1049
+ )
1050
+ k: Param[int] = Param(
1051
+ Params._dummy(),
1052
+ "k",
1053
+ "The ranking position value used in meanAveragePrecisionAtK|precisionAtK|"
1054
+ "ndcgAtK|recallAtK. Must be > 0. The default value is 10.",
1055
+ typeConverter=TypeConverters.toInt,
1056
+ )
1057
+
1058
+ _input_kwargs: Dict[str, Any]
1059
+
1060
+ @keyword_only
1061
+ def __init__(
1062
+ self,
1063
+ *,
1064
+ predictionCol: str = "prediction",
1065
+ labelCol: str = "label",
1066
+ metricName: "RankingEvaluatorMetricType" = "meanAveragePrecision",
1067
+ k: int = 10,
1068
+ ):
1069
+ """
1070
+ __init__(self, \\*, predictionCol="prediction", labelCol="label", \
1071
+ metricName="meanAveragePrecision", k=10)
1072
+ """
1073
+ super(RankingEvaluator, self).__init__()
1074
+ self._java_obj = self._new_java_obj(
1075
+ "org.apache.spark.ml.evaluation.RankingEvaluator", self.uid
1076
+ )
1077
+ self._setDefault(metricName="meanAveragePrecision", k=10)
1078
+ kwargs = self._input_kwargs
1079
+ self._set(**kwargs)
1080
+
1081
+ @since("3.0.0")
1082
+ def setMetricName(self, value: "RankingEvaluatorMetricType") -> "RankingEvaluator":
1083
+ """
1084
+ Sets the value of :py:attr:`metricName`.
1085
+ """
1086
+ return self._set(metricName=value)
1087
+
1088
+ @since("3.0.0")
1089
+ def getMetricName(self) -> "RankingEvaluatorMetricType":
1090
+ """
1091
+ Gets the value of metricName or its default value.
1092
+ """
1093
+ return self.getOrDefault(self.metricName)
1094
+
1095
+ @since("3.0.0")
1096
+ def setK(self, value: int) -> "RankingEvaluator":
1097
+ """
1098
+ Sets the value of :py:attr:`k`.
1099
+ """
1100
+ return self._set(k=value)
1101
+
1102
+ @since("3.0.0")
1103
+ def getK(self) -> int:
1104
+ """
1105
+ Gets the value of k or its default value.
1106
+ """
1107
+ return self.getOrDefault(self.k)
1108
+
1109
+ @since("3.0.0")
1110
+ def setLabelCol(self, value: str) -> "RankingEvaluator":
1111
+ """
1112
+ Sets the value of :py:attr:`labelCol`.
1113
+ """
1114
+ return self._set(labelCol=value)
1115
+
1116
+ @since("3.0.0")
1117
+ def setPredictionCol(self, value: str) -> "RankingEvaluator":
1118
+ """
1119
+ Sets the value of :py:attr:`predictionCol`.
1120
+ """
1121
+ return self._set(predictionCol=value)
1122
+
1123
+ @keyword_only
1124
+ @since("3.0.0")
1125
+ def setParams(
1126
+ self,
1127
+ *,
1128
+ predictionCol: str = "prediction",
1129
+ labelCol: str = "label",
1130
+ metricName: "RankingEvaluatorMetricType" = "meanAveragePrecision",
1131
+ k: int = 10,
1132
+ ) -> "RankingEvaluator":
1133
+ """
1134
+ setParams(self, \\*, predictionCol="prediction", labelCol="label", \
1135
+ metricName="meanAveragePrecision", k=10)
1136
+ Sets params for ranking evaluator.
1137
+ """
1138
+ kwargs = self._input_kwargs
1139
+ return self._set(**kwargs)
1140
+
1141
+
1142
+ if __name__ == "__main__":
1143
+ import doctest
1144
+ import tempfile
1145
+ import pyspark.ml.evaluation
1146
+ from pyspark.sql import SparkSession
1147
+
1148
+ globs = pyspark.ml.evaluation.__dict__.copy()
1149
+ # The small batch size here ensures that we see multiple batches,
1150
+ # even in these small test examples:
1151
+ spark = SparkSession.builder.master("local[2]").appName("ml.evaluation tests").getOrCreate()
1152
+ globs["spark"] = spark
1153
+ temp_path = tempfile.mkdtemp()
1154
+ globs["temp_path"] = temp_path
1155
+ try:
1156
+ (failure_count, test_count) = doctest.testmod(globs=globs, optionflags=doctest.ELLIPSIS)
1157
+ spark.stop()
1158
+ finally:
1159
+ from shutil import rmtree
1160
+
1161
+ try:
1162
+ rmtree(temp_path)
1163
+ except OSError:
1164
+ pass
1165
+ if failure_count:
1166
+ sys.exit(-1)