snowpark-connect 0.20.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of snowpark-connect might be problematic. Click here for more details.
- snowflake/snowpark_connect/__init__.py +23 -0
- snowflake/snowpark_connect/analyze_plan/__init__.py +3 -0
- snowflake/snowpark_connect/analyze_plan/map_tree_string.py +38 -0
- snowflake/snowpark_connect/column_name_handler.py +735 -0
- snowflake/snowpark_connect/config.py +576 -0
- snowflake/snowpark_connect/constants.py +47 -0
- snowflake/snowpark_connect/control_server.py +52 -0
- snowflake/snowpark_connect/dataframe_name_handler.py +54 -0
- snowflake/snowpark_connect/date_time_format_mapping.py +399 -0
- snowflake/snowpark_connect/empty_dataframe.py +18 -0
- snowflake/snowpark_connect/error/__init__.py +11 -0
- snowflake/snowpark_connect/error/error_mapping.py +6174 -0
- snowflake/snowpark_connect/error/error_utils.py +321 -0
- snowflake/snowpark_connect/error/exceptions.py +24 -0
- snowflake/snowpark_connect/execute_plan/__init__.py +3 -0
- snowflake/snowpark_connect/execute_plan/map_execution_command.py +204 -0
- snowflake/snowpark_connect/execute_plan/map_execution_root.py +173 -0
- snowflake/snowpark_connect/execute_plan/utils.py +183 -0
- snowflake/snowpark_connect/expression/__init__.py +3 -0
- snowflake/snowpark_connect/expression/literal.py +90 -0
- snowflake/snowpark_connect/expression/map_cast.py +343 -0
- snowflake/snowpark_connect/expression/map_expression.py +293 -0
- snowflake/snowpark_connect/expression/map_extension.py +104 -0
- snowflake/snowpark_connect/expression/map_sql_expression.py +633 -0
- snowflake/snowpark_connect/expression/map_udf.py +142 -0
- snowflake/snowpark_connect/expression/map_unresolved_attribute.py +241 -0
- snowflake/snowpark_connect/expression/map_unresolved_extract_value.py +85 -0
- snowflake/snowpark_connect/expression/map_unresolved_function.py +9450 -0
- snowflake/snowpark_connect/expression/map_unresolved_star.py +218 -0
- snowflake/snowpark_connect/expression/map_update_fields.py +164 -0
- snowflake/snowpark_connect/expression/map_window_function.py +258 -0
- snowflake/snowpark_connect/expression/typer.py +125 -0
- snowflake/snowpark_connect/includes/__init__.py +0 -0
- snowflake/snowpark_connect/includes/jars/antlr4-runtime-4.9.3.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-cli-1.5.0.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-codec-1.16.1.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-collections-3.2.2.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-collections4-4.4.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-compiler-3.1.9.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-compress-1.26.0.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-crypto-1.1.0.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-dbcp-1.4.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-io-2.16.1.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-lang-2.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-lang3-3.12.0.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-logging-1.1.3.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-math3-3.6.1.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-pool-1.5.4.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-text-1.10.0.jar +0 -0
- snowflake/snowpark_connect/includes/jars/hadoop-client-api-3.3.4.jar +0 -0
- snowflake/snowpark_connect/includes/jars/jackson-annotations-2.15.2.jar +0 -0
- snowflake/snowpark_connect/includes/jars/jackson-core-2.15.2.jar +0 -0
- snowflake/snowpark_connect/includes/jars/jackson-core-asl-1.9.13.jar +0 -0
- snowflake/snowpark_connect/includes/jars/jackson-databind-2.15.2.jar +0 -0
- snowflake/snowpark_connect/includes/jars/jackson-dataformat-yaml-2.15.2.jar +0 -0
- snowflake/snowpark_connect/includes/jars/jackson-datatype-jsr310-2.15.2.jar +0 -0
- snowflake/snowpark_connect/includes/jars/jackson-mapper-asl-1.9.13.jar +0 -0
- snowflake/snowpark_connect/includes/jars/jackson-module-scala_2.12-2.15.2.jar +0 -0
- snowflake/snowpark_connect/includes/jars/json4s-ast_2.12-3.7.0-M11.jar +0 -0
- snowflake/snowpark_connect/includes/jars/json4s-core_2.12-3.7.0-M11.jar +0 -0
- snowflake/snowpark_connect/includes/jars/json4s-jackson_2.12-3.7.0-M11.jar +0 -0
- snowflake/snowpark_connect/includes/jars/json4s-scalap_2.12-3.7.0-M11.jar +0 -0
- snowflake/snowpark_connect/includes/jars/kryo-shaded-4.0.2.jar +0 -0
- snowflake/snowpark_connect/includes/jars/log4j-1.2-api-2.20.0.jar +0 -0
- snowflake/snowpark_connect/includes/jars/log4j-api-2.20.0.jar +0 -0
- snowflake/snowpark_connect/includes/jars/log4j-core-2.20.0.jar +0 -0
- snowflake/snowpark_connect/includes/jars/log4j-slf4j2-impl-2.20.0.jar +0 -0
- snowflake/snowpark_connect/includes/jars/paranamer-2.8.jar +0 -0
- snowflake/snowpark_connect/includes/jars/scala-collection-compat_2.12-2.7.0.jar +0 -0
- snowflake/snowpark_connect/includes/jars/scala-compiler-2.12.18.jar +0 -0
- snowflake/snowpark_connect/includes/jars/scala-library-2.12.18.jar +0 -0
- snowflake/snowpark_connect/includes/jars/scala-parser-combinators_2.12-2.3.0.jar +0 -0
- snowflake/snowpark_connect/includes/jars/scala-reflect-2.12.18.jar +0 -0
- snowflake/snowpark_connect/includes/jars/scala-xml_2.12-2.1.0.jar +0 -0
- snowflake/snowpark_connect/includes/jars/slf4j-api-2.0.7.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-catalyst_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-common-utils_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-core_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-graphx_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-hive-thriftserver_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-hive_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-kubernetes_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-kvstore_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-launcher_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-mesos_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-mllib-local_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-mllib_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-network-common_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-network-shuffle_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-repl_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-sketch_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-sql-api_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-sql_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-streaming_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-tags_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-unsafe_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-yarn_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/python/__init__.py +21 -0
- snowflake/snowpark_connect/includes/python/pyspark/__init__.py +173 -0
- snowflake/snowpark_connect/includes/python/pyspark/_globals.py +71 -0
- snowflake/snowpark_connect/includes/python/pyspark/_typing.pyi +43 -0
- snowflake/snowpark_connect/includes/python/pyspark/accumulators.py +341 -0
- snowflake/snowpark_connect/includes/python/pyspark/broadcast.py +383 -0
- snowflake/snowpark_connect/includes/python/pyspark/cloudpickle/__init__.py +8 -0
- snowflake/snowpark_connect/includes/python/pyspark/cloudpickle/cloudpickle.py +948 -0
- snowflake/snowpark_connect/includes/python/pyspark/cloudpickle/cloudpickle_fast.py +844 -0
- snowflake/snowpark_connect/includes/python/pyspark/cloudpickle/compat.py +18 -0
- snowflake/snowpark_connect/includes/python/pyspark/conf.py +276 -0
- snowflake/snowpark_connect/includes/python/pyspark/context.py +2601 -0
- snowflake/snowpark_connect/includes/python/pyspark/daemon.py +218 -0
- snowflake/snowpark_connect/includes/python/pyspark/errors/__init__.py +70 -0
- snowflake/snowpark_connect/includes/python/pyspark/errors/error_classes.py +889 -0
- snowflake/snowpark_connect/includes/python/pyspark/errors/exceptions/__init__.py +16 -0
- snowflake/snowpark_connect/includes/python/pyspark/errors/exceptions/base.py +228 -0
- snowflake/snowpark_connect/includes/python/pyspark/errors/exceptions/captured.py +307 -0
- snowflake/snowpark_connect/includes/python/pyspark/errors/exceptions/connect.py +190 -0
- snowflake/snowpark_connect/includes/python/pyspark/errors/tests/__init__.py +16 -0
- snowflake/snowpark_connect/includes/python/pyspark/errors/tests/test_errors.py +60 -0
- snowflake/snowpark_connect/includes/python/pyspark/errors/utils.py +116 -0
- snowflake/snowpark_connect/includes/python/pyspark/files.py +165 -0
- snowflake/snowpark_connect/includes/python/pyspark/find_spark_home.py +95 -0
- snowflake/snowpark_connect/includes/python/pyspark/install.py +203 -0
- snowflake/snowpark_connect/includes/python/pyspark/instrumentation_utils.py +190 -0
- snowflake/snowpark_connect/includes/python/pyspark/java_gateway.py +248 -0
- snowflake/snowpark_connect/includes/python/pyspark/join.py +118 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/__init__.py +71 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/_typing.pyi +84 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/base.py +414 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/classification.py +4332 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/clustering.py +2188 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/common.py +146 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/connect/__init__.py +44 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/connect/base.py +346 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/connect/classification.py +382 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/connect/evaluation.py +291 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/connect/feature.py +258 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/connect/functions.py +77 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/connect/io_utils.py +335 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/connect/pipeline.py +262 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/connect/summarizer.py +120 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/connect/tuning.py +579 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/connect/util.py +173 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/deepspeed/__init__.py +16 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/deepspeed/deepspeed_distributor.py +165 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/deepspeed/tests/test_deepspeed_distributor.py +306 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/dl_util.py +150 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/evaluation.py +1166 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/feature.py +7474 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/fpm.py +543 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/functions.py +842 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/image.py +271 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/linalg/__init__.py +1382 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/model_cache.py +55 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/param/__init__.py +602 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/param/_shared_params_code_gen.py +368 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/param/shared.py +878 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/pipeline.py +451 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/recommendation.py +748 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/regression.py +3335 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/stat.py +523 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/__init__.py +16 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_connect_classification.py +53 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_connect_evaluation.py +50 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_connect_feature.py +43 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_connect_function.py +114 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_connect_pipeline.py +47 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_connect_summarizer.py +43 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_connect_tuning.py +46 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_legacy_mode_classification.py +238 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_legacy_mode_evaluation.py +194 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_legacy_mode_feature.py +156 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_legacy_mode_pipeline.py +184 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_legacy_mode_summarizer.py +78 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_legacy_mode_tuning.py +292 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_parity_torch_data_loader.py +50 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_parity_torch_distributor.py +152 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_algorithms.py +456 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_base.py +96 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_dl_util.py +186 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_evaluation.py +77 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_feature.py +401 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_functions.py +528 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_image.py +82 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_linalg.py +409 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_model_cache.py +55 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_param.py +441 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_persistence.py +546 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_pipeline.py +71 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_stat.py +52 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_training_summary.py +494 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_util.py +85 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_wrapper.py +138 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/tuning/__init__.py +16 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/tuning/test_cv_io_basic.py +151 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/tuning/test_cv_io_nested.py +97 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/tuning/test_cv_io_pipeline.py +143 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/tuning/test_tuning.py +551 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/tuning/test_tvs_io_basic.py +137 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/tuning/test_tvs_io_nested.py +96 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/tuning/test_tvs_io_pipeline.py +142 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/torch/__init__.py +16 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/torch/data.py +100 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/torch/distributor.py +1133 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/torch/log_communication.py +198 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/torch/tests/__init__.py +16 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/torch/tests/test_data_loader.py +137 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/torch/tests/test_distributor.py +561 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/torch/tests/test_log_communication.py +172 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/torch/torch_run_process_wrapper.py +83 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/tree.py +434 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/tuning.py +1741 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/util.py +749 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/wrapper.py +465 -0
- snowflake/snowpark_connect/includes/python/pyspark/mllib/__init__.py +44 -0
- snowflake/snowpark_connect/includes/python/pyspark/mllib/_typing.pyi +33 -0
- snowflake/snowpark_connect/includes/python/pyspark/mllib/classification.py +989 -0
- snowflake/snowpark_connect/includes/python/pyspark/mllib/clustering.py +1318 -0
- snowflake/snowpark_connect/includes/python/pyspark/mllib/common.py +174 -0
- snowflake/snowpark_connect/includes/python/pyspark/mllib/evaluation.py +691 -0
- snowflake/snowpark_connect/includes/python/pyspark/mllib/feature.py +1085 -0
- snowflake/snowpark_connect/includes/python/pyspark/mllib/fpm.py +233 -0
- snowflake/snowpark_connect/includes/python/pyspark/mllib/linalg/__init__.py +1653 -0
- snowflake/snowpark_connect/includes/python/pyspark/mllib/linalg/distributed.py +1662 -0
- snowflake/snowpark_connect/includes/python/pyspark/mllib/random.py +698 -0
- snowflake/snowpark_connect/includes/python/pyspark/mllib/recommendation.py +389 -0
- snowflake/snowpark_connect/includes/python/pyspark/mllib/regression.py +1067 -0
- snowflake/snowpark_connect/includes/python/pyspark/mllib/stat/KernelDensity.py +59 -0
- snowflake/snowpark_connect/includes/python/pyspark/mllib/stat/__init__.py +34 -0
- snowflake/snowpark_connect/includes/python/pyspark/mllib/stat/_statistics.py +409 -0
- snowflake/snowpark_connect/includes/python/pyspark/mllib/stat/distribution.py +39 -0
- snowflake/snowpark_connect/includes/python/pyspark/mllib/stat/test.py +86 -0
- snowflake/snowpark_connect/includes/python/pyspark/mllib/tests/__init__.py +16 -0
- snowflake/snowpark_connect/includes/python/pyspark/mllib/tests/test_algorithms.py +353 -0
- snowflake/snowpark_connect/includes/python/pyspark/mllib/tests/test_feature.py +192 -0
- snowflake/snowpark_connect/includes/python/pyspark/mllib/tests/test_linalg.py +680 -0
- snowflake/snowpark_connect/includes/python/pyspark/mllib/tests/test_stat.py +206 -0
- snowflake/snowpark_connect/includes/python/pyspark/mllib/tests/test_streaming_algorithms.py +471 -0
- snowflake/snowpark_connect/includes/python/pyspark/mllib/tests/test_util.py +108 -0
- snowflake/snowpark_connect/includes/python/pyspark/mllib/tree.py +888 -0
- snowflake/snowpark_connect/includes/python/pyspark/mllib/util.py +659 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/__init__.py +165 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/_typing.py +52 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/accessors.py +989 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/base.py +1804 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/categorical.py +822 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/config.py +539 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/correlation.py +262 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/data_type_ops/__init__.py +16 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/data_type_ops/base.py +519 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/data_type_ops/binary_ops.py +98 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/data_type_ops/boolean_ops.py +426 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/data_type_ops/categorical_ops.py +141 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/data_type_ops/complex_ops.py +145 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/data_type_ops/date_ops.py +127 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/data_type_ops/datetime_ops.py +171 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/data_type_ops/null_ops.py +83 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/data_type_ops/num_ops.py +588 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/data_type_ops/string_ops.py +154 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/data_type_ops/timedelta_ops.py +101 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/data_type_ops/udt_ops.py +29 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/datetimes.py +891 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/exceptions.py +150 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/extensions.py +388 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/frame.py +13738 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/generic.py +3560 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/groupby.py +4448 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/indexes/__init__.py +21 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/indexes/base.py +2783 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/indexes/category.py +773 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/indexes/datetimes.py +843 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/indexes/multi.py +1323 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/indexes/numeric.py +210 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/indexes/timedelta.py +197 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/indexing.py +1862 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/internal.py +1680 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/missing/__init__.py +48 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/missing/common.py +76 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/missing/frame.py +63 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/missing/general_functions.py +43 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/missing/groupby.py +93 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/missing/indexes.py +184 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/missing/resample.py +101 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/missing/scalars.py +29 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/missing/series.py +69 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/missing/window.py +168 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/mlflow.py +238 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/namespace.py +3807 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/numpy_compat.py +260 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/plot/__init__.py +17 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/plot/core.py +1213 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/plot/matplotlib.py +928 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/plot/plotly.py +261 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/resample.py +816 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/series.py +7440 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/sql_formatter.py +308 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/sql_processor.py +394 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/strings.py +2371 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/supported_api_gen.py +378 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/__init__.py +16 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/__init__.py +16 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_any_all.py +177 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_apply_func.py +575 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_binary_ops.py +235 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_combine.py +653 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_compute.py +463 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_corrwith.py +86 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_cov.py +151 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_cumulative.py +139 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_describe.py +458 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_eval.py +86 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_melt.py +202 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_missing_data.py +520 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_pivot.py +361 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/__init__.py +16 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/__init__.py +16 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_any_all.py +40 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_apply_func.py +42 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_binary_ops.py +40 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_combine.py +37 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_compute.py +60 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_corrwith.py +40 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_cov.py +40 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_cumulative.py +90 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_describe.py +40 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_eval.py +40 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_melt.py +40 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_missing_data.py +42 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_pivot.py +37 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/__init__.py +16 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_base.py +36 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_binary_ops.py +42 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_boolean_ops.py +47 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_categorical_ops.py +55 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_complex_ops.py +40 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_date_ops.py +47 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_datetime_ops.py +47 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_null_ops.py +42 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_num_arithmetic.py +43 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_num_ops.py +47 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_num_reverse.py +43 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_string_ops.py +47 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_timedelta_ops.py +47 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_udt_ops.py +40 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/testing_utils.py +226 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/__init__.py +16 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_align.py +39 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_basic_slow.py +55 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_cov_corrwith.py +39 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_dot_frame.py +39 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_dot_series.py +39 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_index.py +39 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_series.py +39 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_setitem_frame.py +43 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_setitem_series.py +43 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/__init__.py +16 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_attrs.py +40 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_constructor.py +39 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_conversion.py +42 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_reindexing.py +42 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_reshaping.py +37 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_spark.py +40 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_take.py +42 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_time_series.py +48 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_truncate.py +40 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/__init__.py +16 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_aggregate.py +40 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_apply_func.py +41 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_cumulative.py +67 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_describe.py +40 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_groupby.py +55 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_head_tail.py +40 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_index.py +38 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_missing_data.py +55 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_split_apply.py +39 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_stat.py +38 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/__init__.py +16 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_align.py +40 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_base.py +50 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_category.py +73 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_datetime.py +39 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_indexing.py +40 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_reindex.py +40 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_rename.py +40 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_reset_index.py +48 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_timedelta.py +39 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/io/__init__.py +16 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/io/test_parity_io.py +40 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/plot/__init__.py +16 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/plot/test_parity_frame_plot.py +45 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/plot/test_parity_frame_plot_matplotlib.py +45 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/plot/test_parity_frame_plot_plotly.py +49 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/plot/test_parity_series_plot.py +37 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/plot/test_parity_series_plot_matplotlib.py +53 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/plot/test_parity_series_plot_plotly.py +45 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/__init__.py +16 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_all_any.py +38 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_arg_ops.py +37 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_as_of.py +37 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_as_type.py +38 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_compute.py +37 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_conversion.py +40 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_cumulative.py +40 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_index.py +38 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_missing_data.py +40 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_series.py +37 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_sort.py +38 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_stat.py +38 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_categorical.py +66 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_config.py +37 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_csv.py +37 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_dataframe_conversion.py +42 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_dataframe_spark_io.py +39 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_default_index.py +49 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_ewm.py +37 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_expanding.py +39 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_extension.py +49 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_frame_spark.py +53 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_generic_functions.py +43 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_indexing.py +49 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_indexops_spark.py +39 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_internal.py +41 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_namespace.py +39 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_numpy_compat.py +60 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_ops_on_diff_frames.py +48 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_ops_on_diff_frames_groupby.py +39 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_ops_on_diff_frames_groupby_expanding.py +44 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_ops_on_diff_frames_groupby_rolling.py +84 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_repr.py +37 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_resample.py +45 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_reshape.py +39 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_rolling.py +39 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_scalars.py +37 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_series_conversion.py +39 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_series_datetime.py +39 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_series_string.py +39 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_spark_functions.py +39 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_sql.py +43 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_stats.py +37 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_typedef.py +36 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_utils.py +37 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_window.py +39 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/__init__.py +16 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_base.py +107 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_binary_ops.py +224 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_boolean_ops.py +825 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_categorical_ops.py +562 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_complex_ops.py +368 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_date_ops.py +257 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_datetime_ops.py +260 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_null_ops.py +178 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_num_arithmetic.py +184 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_num_ops.py +497 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_num_reverse.py +140 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_string_ops.py +354 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_timedelta_ops.py +219 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_udt_ops.py +192 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/testing_utils.py +228 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/__init__.py +16 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_align.py +118 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_basic_slow.py +198 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_cov_corrwith.py +181 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_dot_frame.py +103 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_dot_series.py +141 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_index.py +109 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_series.py +136 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_setitem_frame.py +125 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_setitem_series.py +217 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/__init__.py +16 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_attrs.py +384 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_constructor.py +598 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_conversion.py +73 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_reindexing.py +869 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_reshaping.py +487 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_spark.py +309 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_take.py +156 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_time_series.py +149 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_truncate.py +163 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/__init__.py +16 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_aggregate.py +311 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_apply_func.py +524 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_cumulative.py +419 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_describe.py +144 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_groupby.py +979 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_head_tail.py +234 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_index.py +206 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_missing_data.py +421 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_split_apply.py +187 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_stat.py +397 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/__init__.py +16 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_align.py +100 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_base.py +2743 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_category.py +484 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_datetime.py +276 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_indexing.py +432 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_reindex.py +310 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_rename.py +257 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_reset_index.py +160 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_timedelta.py +128 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/io/__init__.py +16 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/io/test_io.py +137 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/plot/__init__.py +16 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/plot/test_frame_plot.py +170 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/plot/test_frame_plot_matplotlib.py +547 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/plot/test_frame_plot_plotly.py +285 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/plot/test_series_plot.py +106 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/plot/test_series_plot_matplotlib.py +409 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/plot/test_series_plot_plotly.py +247 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/__init__.py +16 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_all_any.py +105 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_arg_ops.py +197 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_as_of.py +137 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_as_type.py +227 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_compute.py +634 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_conversion.py +88 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_cumulative.py +139 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_index.py +475 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_missing_data.py +265 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_series.py +818 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_sort.py +162 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_stat.py +780 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_categorical.py +741 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_config.py +160 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_csv.py +453 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_dataframe_conversion.py +281 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_dataframe_spark_io.py +487 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_default_index.py +109 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_ewm.py +434 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_expanding.py +253 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_extension.py +152 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_frame_spark.py +162 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_generic_functions.py +234 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_indexing.py +1339 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_indexops_spark.py +82 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_internal.py +124 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_namespace.py +638 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_numpy_compat.py +200 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_ops_on_diff_frames.py +1355 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_ops_on_diff_frames_groupby.py +655 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_ops_on_diff_frames_groupby_expanding.py +113 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_ops_on_diff_frames_groupby_rolling.py +118 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_repr.py +192 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_resample.py +346 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_reshape.py +495 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_rolling.py +263 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_scalars.py +59 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_series_conversion.py +85 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_series_datetime.py +364 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_series_string.py +362 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_spark_functions.py +46 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_sql.py +123 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_stats.py +581 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_typedef.py +447 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_utils.py +301 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_window.py +465 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/typedef/__init__.py +18 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/typedef/typehints.py +874 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/usage_logging/__init__.py +143 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/usage_logging/usage_logger.py +132 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/utils.py +1063 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/window.py +2702 -0
- snowflake/snowpark_connect/includes/python/pyspark/profiler.py +489 -0
- snowflake/snowpark_connect/includes/python/pyspark/py.typed +1 -0
- snowflake/snowpark_connect/includes/python/pyspark/python/pyspark/shell.py +123 -0
- snowflake/snowpark_connect/includes/python/pyspark/rdd.py +5518 -0
- snowflake/snowpark_connect/includes/python/pyspark/rddsampler.py +115 -0
- snowflake/snowpark_connect/includes/python/pyspark/resource/__init__.py +38 -0
- snowflake/snowpark_connect/includes/python/pyspark/resource/information.py +69 -0
- snowflake/snowpark_connect/includes/python/pyspark/resource/profile.py +317 -0
- snowflake/snowpark_connect/includes/python/pyspark/resource/requests.py +539 -0
- snowflake/snowpark_connect/includes/python/pyspark/resource/tests/__init__.py +16 -0
- snowflake/snowpark_connect/includes/python/pyspark/resource/tests/test_resources.py +83 -0
- snowflake/snowpark_connect/includes/python/pyspark/resultiterable.py +45 -0
- snowflake/snowpark_connect/includes/python/pyspark/serializers.py +681 -0
- snowflake/snowpark_connect/includes/python/pyspark/shell.py +123 -0
- snowflake/snowpark_connect/includes/python/pyspark/shuffle.py +854 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/__init__.py +75 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/_typing.pyi +80 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/avro/__init__.py +18 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/avro/functions.py +188 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/catalog.py +1270 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/column.py +1431 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/conf.py +99 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/connect/__init__.py +18 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/connect/_typing.py +90 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/connect/avro/__init__.py +18 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/connect/avro/functions.py +107 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/connect/catalog.py +356 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/connect/client/__init__.py +22 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/connect/client/artifact.py +412 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/connect/client/core.py +1689 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/connect/client/reattach.py +340 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/connect/column.py +514 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/connect/conf.py +128 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/connect/conversion.py +490 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/connect/dataframe.py +2172 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/connect/expressions.py +1056 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/connect/functions.py +3937 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/connect/group.py +418 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/connect/plan.py +2289 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/connect/proto/__init__.py +25 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/connect/proto/base_pb2.py +203 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/connect/proto/base_pb2.pyi +2718 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/connect/proto/base_pb2_grpc.py +423 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/connect/proto/catalog_pb2.py +109 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/connect/proto/catalog_pb2.pyi +1130 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/connect/proto/commands_pb2.py +141 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/connect/proto/commands_pb2.pyi +1766 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/connect/proto/common_pb2.py +47 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/connect/proto/common_pb2.pyi +123 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/connect/proto/example_plugins_pb2.py +53 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/connect/proto/example_plugins_pb2.pyi +112 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/connect/proto/expressions_pb2.py +107 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/connect/proto/expressions_pb2.pyi +1507 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/connect/proto/relations_pb2.py +195 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/connect/proto/relations_pb2.pyi +3613 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/connect/proto/types_pb2.py +95 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/connect/proto/types_pb2.pyi +980 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/connect/protobuf/__init__.py +18 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/connect/protobuf/functions.py +166 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/connect/readwriter.py +861 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/connect/session.py +952 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/connect/streaming/__init__.py +22 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/connect/streaming/query.py +295 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/connect/streaming/readwriter.py +618 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/connect/streaming/worker/__init__.py +18 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/connect/streaming/worker/foreach_batch_worker.py +87 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/connect/streaming/worker/listener_worker.py +100 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/connect/types.py +301 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/connect/udf.py +296 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/connect/udtf.py +200 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/connect/utils.py +58 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/connect/window.py +266 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/context.py +818 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/dataframe.py +5973 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/functions.py +15889 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/group.py +547 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/observation.py +152 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/pandas/__init__.py +21 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/pandas/_typing/__init__.pyi +344 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/pandas/_typing/protocols/__init__.pyi +17 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/pandas/_typing/protocols/frame.pyi +20 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/pandas/_typing/protocols/series.pyi +20 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/pandas/conversion.py +671 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/pandas/functions.py +480 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/pandas/functions.pyi +132 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/pandas/group_ops.py +523 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/pandas/map_ops.py +216 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/pandas/serializers.py +1019 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/pandas/typehints.py +172 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/pandas/types.py +972 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/pandas/utils.py +86 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/protobuf/__init__.py +18 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/protobuf/functions.py +334 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/readwriter.py +2159 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/session.py +2088 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/sql_formatter.py +84 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/streaming/__init__.py +21 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/streaming/listener.py +1050 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/streaming/query.py +746 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/streaming/readwriter.py +1652 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/streaming/state.py +288 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/__init__.py +16 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/__init__.py +16 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/client/__init__.py +16 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/client/test_artifact.py +420 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/client/test_client.py +358 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/streaming/__init__.py +16 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/streaming/test_parity_foreach.py +36 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/streaming/test_parity_foreach_batch.py +44 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/streaming/test_parity_listener.py +116 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/streaming/test_parity_streaming.py +35 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_connect_basic.py +3612 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_connect_column.py +1042 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_connect_function.py +2381 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_connect_plan.py +1060 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_arrow.py +163 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_arrow_map.py +38 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_arrow_python_udf.py +48 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_catalog.py +36 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_column.py +55 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_conf.py +36 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_dataframe.py +96 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_datasources.py +44 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_errors.py +36 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_functions.py +59 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_group.py +36 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_pandas_cogrouped_map.py +59 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_pandas_grouped_map.py +74 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_pandas_grouped_map_with_state.py +62 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_pandas_map.py +58 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_pandas_udf.py +70 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_pandas_udf_grouped_agg.py +50 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_pandas_udf_scalar.py +68 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_pandas_udf_window.py +40 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_readwriter.py +46 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_serde.py +44 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_types.py +100 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_udf.py +100 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_udtf.py +163 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_session.py +181 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_utils.py +42 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/__init__.py +16 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_cogrouped_map.py +623 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_grouped_map.py +869 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_grouped_map_with_state.py +342 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_map.py +436 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_udf.py +363 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_udf_grouped_agg.py +592 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_udf_scalar.py +1503 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_udf_typehints.py +392 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_udf_typehints_with_future_annotations.py +375 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_udf_window.py +411 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/streaming/__init__.py +16 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/streaming/test_streaming.py +401 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/streaming/test_streaming_foreach.py +295 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/streaming/test_streaming_foreach_batch.py +106 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/streaming/test_streaming_listener.py +558 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_arrow.py +1346 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_arrow_map.py +182 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_arrow_python_udf.py +202 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_catalog.py +503 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_column.py +225 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_conf.py +83 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_context.py +201 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_dataframe.py +1931 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_datasources.py +256 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_errors.py +69 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_functions.py +1349 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_group.py +53 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_pandas_sqlmetrics.py +68 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_readwriter.py +283 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_serde.py +155 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_session.py +412 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_types.py +1581 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_udf.py +961 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_udf_profiler.py +165 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_udtf.py +1456 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_utils.py +1686 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/types.py +2558 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/udf.py +714 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/udtf.py +325 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/utils.py +339 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/window.py +492 -0
- snowflake/snowpark_connect/includes/python/pyspark/statcounter.py +165 -0
- snowflake/snowpark_connect/includes/python/pyspark/status.py +112 -0
- snowflake/snowpark_connect/includes/python/pyspark/storagelevel.py +97 -0
- snowflake/snowpark_connect/includes/python/pyspark/streaming/__init__.py +22 -0
- snowflake/snowpark_connect/includes/python/pyspark/streaming/context.py +471 -0
- snowflake/snowpark_connect/includes/python/pyspark/streaming/dstream.py +933 -0
- snowflake/snowpark_connect/includes/python/pyspark/streaming/kinesis.py +205 -0
- snowflake/snowpark_connect/includes/python/pyspark/streaming/listener.py +83 -0
- snowflake/snowpark_connect/includes/python/pyspark/streaming/tests/__init__.py +16 -0
- snowflake/snowpark_connect/includes/python/pyspark/streaming/tests/test_context.py +184 -0
- snowflake/snowpark_connect/includes/python/pyspark/streaming/tests/test_dstream.py +706 -0
- snowflake/snowpark_connect/includes/python/pyspark/streaming/tests/test_kinesis.py +118 -0
- snowflake/snowpark_connect/includes/python/pyspark/streaming/tests/test_listener.py +160 -0
- snowflake/snowpark_connect/includes/python/pyspark/streaming/util.py +168 -0
- snowflake/snowpark_connect/includes/python/pyspark/taskcontext.py +502 -0
- snowflake/snowpark_connect/includes/python/pyspark/testing/__init__.py +21 -0
- snowflake/snowpark_connect/includes/python/pyspark/testing/connectutils.py +199 -0
- snowflake/snowpark_connect/includes/python/pyspark/testing/mllibutils.py +30 -0
- snowflake/snowpark_connect/includes/python/pyspark/testing/mlutils.py +275 -0
- snowflake/snowpark_connect/includes/python/pyspark/testing/objects.py +121 -0
- snowflake/snowpark_connect/includes/python/pyspark/testing/pandasutils.py +714 -0
- snowflake/snowpark_connect/includes/python/pyspark/testing/sqlutils.py +168 -0
- snowflake/snowpark_connect/includes/python/pyspark/testing/streamingutils.py +178 -0
- snowflake/snowpark_connect/includes/python/pyspark/testing/utils.py +636 -0
- snowflake/snowpark_connect/includes/python/pyspark/tests/__init__.py +16 -0
- snowflake/snowpark_connect/includes/python/pyspark/tests/test_appsubmit.py +306 -0
- snowflake/snowpark_connect/includes/python/pyspark/tests/test_broadcast.py +196 -0
- snowflake/snowpark_connect/includes/python/pyspark/tests/test_conf.py +44 -0
- snowflake/snowpark_connect/includes/python/pyspark/tests/test_context.py +346 -0
- snowflake/snowpark_connect/includes/python/pyspark/tests/test_daemon.py +89 -0
- snowflake/snowpark_connect/includes/python/pyspark/tests/test_install_spark.py +124 -0
- snowflake/snowpark_connect/includes/python/pyspark/tests/test_join.py +69 -0
- snowflake/snowpark_connect/includes/python/pyspark/tests/test_memory_profiler.py +167 -0
- snowflake/snowpark_connect/includes/python/pyspark/tests/test_pin_thread.py +194 -0
- snowflake/snowpark_connect/includes/python/pyspark/tests/test_profiler.py +168 -0
- snowflake/snowpark_connect/includes/python/pyspark/tests/test_rdd.py +939 -0
- snowflake/snowpark_connect/includes/python/pyspark/tests/test_rddbarrier.py +52 -0
- snowflake/snowpark_connect/includes/python/pyspark/tests/test_rddsampler.py +66 -0
- snowflake/snowpark_connect/includes/python/pyspark/tests/test_readwrite.py +368 -0
- snowflake/snowpark_connect/includes/python/pyspark/tests/test_serializers.py +257 -0
- snowflake/snowpark_connect/includes/python/pyspark/tests/test_shuffle.py +267 -0
- snowflake/snowpark_connect/includes/python/pyspark/tests/test_stage_sched.py +153 -0
- snowflake/snowpark_connect/includes/python/pyspark/tests/test_statcounter.py +130 -0
- snowflake/snowpark_connect/includes/python/pyspark/tests/test_taskcontext.py +350 -0
- snowflake/snowpark_connect/includes/python/pyspark/tests/test_util.py +97 -0
- snowflake/snowpark_connect/includes/python/pyspark/tests/test_worker.py +271 -0
- snowflake/snowpark_connect/includes/python/pyspark/traceback_utils.py +81 -0
- snowflake/snowpark_connect/includes/python/pyspark/util.py +416 -0
- snowflake/snowpark_connect/includes/python/pyspark/version.py +19 -0
- snowflake/snowpark_connect/includes/python/pyspark/worker.py +1307 -0
- snowflake/snowpark_connect/includes/python/pyspark/worker_util.py +46 -0
- snowflake/snowpark_connect/proto/__init__.py +10 -0
- snowflake/snowpark_connect/proto/control_pb2.py +35 -0
- snowflake/snowpark_connect/proto/control_pb2.pyi +38 -0
- snowflake/snowpark_connect/proto/control_pb2_grpc.py +183 -0
- snowflake/snowpark_connect/proto/snowflake_expression_ext_pb2.py +35 -0
- snowflake/snowpark_connect/proto/snowflake_expression_ext_pb2.pyi +53 -0
- snowflake/snowpark_connect/proto/snowflake_rdd_pb2.pyi +39 -0
- snowflake/snowpark_connect/proto/snowflake_relation_ext_pb2.py +47 -0
- snowflake/snowpark_connect/proto/snowflake_relation_ext_pb2.pyi +111 -0
- snowflake/snowpark_connect/relation/__init__.py +3 -0
- snowflake/snowpark_connect/relation/catalogs/__init__.py +12 -0
- snowflake/snowpark_connect/relation/catalogs/abstract_spark_catalog.py +287 -0
- snowflake/snowpark_connect/relation/catalogs/snowflake_catalog.py +467 -0
- snowflake/snowpark_connect/relation/catalogs/utils.py +51 -0
- snowflake/snowpark_connect/relation/io_utils.py +76 -0
- snowflake/snowpark_connect/relation/map_aggregate.py +322 -0
- snowflake/snowpark_connect/relation/map_catalog.py +151 -0
- snowflake/snowpark_connect/relation/map_column_ops.py +1068 -0
- snowflake/snowpark_connect/relation/map_crosstab.py +48 -0
- snowflake/snowpark_connect/relation/map_extension.py +412 -0
- snowflake/snowpark_connect/relation/map_join.py +341 -0
- snowflake/snowpark_connect/relation/map_local_relation.py +326 -0
- snowflake/snowpark_connect/relation/map_map_partitions.py +146 -0
- snowflake/snowpark_connect/relation/map_relation.py +253 -0
- snowflake/snowpark_connect/relation/map_row_ops.py +716 -0
- snowflake/snowpark_connect/relation/map_sample_by.py +35 -0
- snowflake/snowpark_connect/relation/map_show_string.py +50 -0
- snowflake/snowpark_connect/relation/map_sql.py +1874 -0
- snowflake/snowpark_connect/relation/map_stats.py +324 -0
- snowflake/snowpark_connect/relation/map_subquery_alias.py +32 -0
- snowflake/snowpark_connect/relation/map_udtf.py +288 -0
- snowflake/snowpark_connect/relation/read/__init__.py +7 -0
- snowflake/snowpark_connect/relation/read/jdbc_read_dbapi.py +668 -0
- snowflake/snowpark_connect/relation/read/map_read.py +367 -0
- snowflake/snowpark_connect/relation/read/map_read_csv.py +142 -0
- snowflake/snowpark_connect/relation/read/map_read_jdbc.py +108 -0
- snowflake/snowpark_connect/relation/read/map_read_json.py +344 -0
- snowflake/snowpark_connect/relation/read/map_read_parquet.py +194 -0
- snowflake/snowpark_connect/relation/read/map_read_socket.py +59 -0
- snowflake/snowpark_connect/relation/read/map_read_table.py +109 -0
- snowflake/snowpark_connect/relation/read/map_read_text.py +106 -0
- snowflake/snowpark_connect/relation/read/reader_config.py +399 -0
- snowflake/snowpark_connect/relation/read/utils.py +155 -0
- snowflake/snowpark_connect/relation/stage_locator.py +161 -0
- snowflake/snowpark_connect/relation/utils.py +219 -0
- snowflake/snowpark_connect/relation/write/__init__.py +3 -0
- snowflake/snowpark_connect/relation/write/jdbc_write_dbapi.py +339 -0
- snowflake/snowpark_connect/relation/write/map_write.py +436 -0
- snowflake/snowpark_connect/relation/write/map_write_jdbc.py +48 -0
- snowflake/snowpark_connect/resources/java_udfs-1.0-SNAPSHOT.jar +0 -0
- snowflake/snowpark_connect/resources_initializer.py +75 -0
- snowflake/snowpark_connect/server.py +1136 -0
- snowflake/snowpark_connect/start_server.py +32 -0
- snowflake/snowpark_connect/tcm.py +8 -0
- snowflake/snowpark_connect/type_mapping.py +1003 -0
- snowflake/snowpark_connect/typed_column.py +94 -0
- snowflake/snowpark_connect/utils/__init__.py +3 -0
- snowflake/snowpark_connect/utils/artifacts.py +48 -0
- snowflake/snowpark_connect/utils/attribute_handling.py +72 -0
- snowflake/snowpark_connect/utils/cache.py +84 -0
- snowflake/snowpark_connect/utils/concurrent.py +124 -0
- snowflake/snowpark_connect/utils/context.py +390 -0
- snowflake/snowpark_connect/utils/describe_query_cache.py +231 -0
- snowflake/snowpark_connect/utils/interrupt.py +85 -0
- snowflake/snowpark_connect/utils/io_utils.py +35 -0
- snowflake/snowpark_connect/utils/pandas_udtf_utils.py +117 -0
- snowflake/snowpark_connect/utils/profiling.py +47 -0
- snowflake/snowpark_connect/utils/session.py +180 -0
- snowflake/snowpark_connect/utils/snowpark_connect_logging.py +38 -0
- snowflake/snowpark_connect/utils/telemetry.py +513 -0
- snowflake/snowpark_connect/utils/udf_cache.py +392 -0
- snowflake/snowpark_connect/utils/udf_helper.py +328 -0
- snowflake/snowpark_connect/utils/udf_utils.py +310 -0
- snowflake/snowpark_connect/utils/udtf_helper.py +420 -0
- snowflake/snowpark_connect/utils/udtf_utils.py +799 -0
- snowflake/snowpark_connect/utils/xxhash64.py +247 -0
- snowflake/snowpark_connect/version.py +6 -0
- snowpark_connect-0.20.2.data/scripts/snowpark-connect +71 -0
- snowpark_connect-0.20.2.data/scripts/snowpark-session +11 -0
- snowpark_connect-0.20.2.data/scripts/snowpark-submit +354 -0
- snowpark_connect-0.20.2.dist-info/METADATA +37 -0
- snowpark_connect-0.20.2.dist-info/RECORD +879 -0
- snowpark_connect-0.20.2.dist-info/WHEEL +5 -0
- snowpark_connect-0.20.2.dist-info/licenses/LICENSE.txt +202 -0
- snowpark_connect-0.20.2.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,1689 @@
|
|
|
1
|
+
#
|
|
2
|
+
# Licensed to the Apache Software Foundation (ASF) under one or more
|
|
3
|
+
# contributor license agreements. See the NOTICE file distributed with
|
|
4
|
+
# this work for additional information regarding copyright ownership.
|
|
5
|
+
# The ASF licenses this file to You under the Apache License, Version 2.0
|
|
6
|
+
# (the "License"); you may not use this file except in compliance with
|
|
7
|
+
# the License. You may obtain a copy of the License at
|
|
8
|
+
#
|
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
10
|
+
#
|
|
11
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
12
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
13
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
14
|
+
# See the License for the specific language governing permissions and
|
|
15
|
+
# limitations under the License.
|
|
16
|
+
#
|
|
17
|
+
__all__ = [
|
|
18
|
+
"ChannelBuilder",
|
|
19
|
+
"SparkConnectClient",
|
|
20
|
+
"getLogLevel",
|
|
21
|
+
]
|
|
22
|
+
|
|
23
|
+
from pyspark.sql.connect.utils import check_dependencies
|
|
24
|
+
|
|
25
|
+
check_dependencies(__name__)
|
|
26
|
+
|
|
27
|
+
import threading
|
|
28
|
+
import logging
|
|
29
|
+
import os
|
|
30
|
+
import platform
|
|
31
|
+
import random
|
|
32
|
+
import time
|
|
33
|
+
import urllib.parse
|
|
34
|
+
import uuid
|
|
35
|
+
import sys
|
|
36
|
+
from types import TracebackType
|
|
37
|
+
from typing import (
|
|
38
|
+
Iterable,
|
|
39
|
+
Iterator,
|
|
40
|
+
Optional,
|
|
41
|
+
Any,
|
|
42
|
+
Union,
|
|
43
|
+
List,
|
|
44
|
+
Tuple,
|
|
45
|
+
Dict,
|
|
46
|
+
Set,
|
|
47
|
+
NoReturn,
|
|
48
|
+
cast,
|
|
49
|
+
Callable,
|
|
50
|
+
Generator,
|
|
51
|
+
Type,
|
|
52
|
+
TYPE_CHECKING,
|
|
53
|
+
Sequence,
|
|
54
|
+
)
|
|
55
|
+
|
|
56
|
+
import pandas as pd
|
|
57
|
+
import pyarrow as pa
|
|
58
|
+
|
|
59
|
+
import google.protobuf.message
|
|
60
|
+
from grpc_status import rpc_status
|
|
61
|
+
import grpc
|
|
62
|
+
from google.protobuf import text_format
|
|
63
|
+
from google.rpc import error_details_pb2
|
|
64
|
+
|
|
65
|
+
from pyspark.version import __version__
|
|
66
|
+
from pyspark.resource.information import ResourceInformation
|
|
67
|
+
from pyspark.sql.connect.client.artifact import ArtifactManager
|
|
68
|
+
from pyspark.sql.connect.client.reattach import (
|
|
69
|
+
ExecutePlanResponseReattachableIterator,
|
|
70
|
+
RetryException,
|
|
71
|
+
)
|
|
72
|
+
from pyspark.sql.connect.conversion import storage_level_to_proto, proto_to_storage_level
|
|
73
|
+
import pyspark.sql.connect.proto as pb2
|
|
74
|
+
import pyspark.sql.connect.proto.base_pb2_grpc as grpc_lib
|
|
75
|
+
import pyspark.sql.connect.types as types
|
|
76
|
+
from pyspark.errors.exceptions.connect import (
|
|
77
|
+
convert_exception,
|
|
78
|
+
SparkConnectException,
|
|
79
|
+
SparkConnectGrpcException,
|
|
80
|
+
)
|
|
81
|
+
from pyspark.sql.connect.expressions import (
|
|
82
|
+
PythonUDF,
|
|
83
|
+
CommonInlineUserDefinedFunction,
|
|
84
|
+
JavaUDF,
|
|
85
|
+
)
|
|
86
|
+
from pyspark.sql.connect.plan import (
|
|
87
|
+
CommonInlineUserDefinedTableFunction,
|
|
88
|
+
PythonUDTF,
|
|
89
|
+
)
|
|
90
|
+
from pyspark.sql.connect.utils import get_python_ver
|
|
91
|
+
from pyspark.sql.pandas.types import _create_converter_to_pandas, from_arrow_schema
|
|
92
|
+
from pyspark.sql.types import DataType, StructType, TimestampType, _has_type
|
|
93
|
+
from pyspark.rdd import PythonEvalType
|
|
94
|
+
from pyspark.storagelevel import StorageLevel
|
|
95
|
+
from pyspark.errors import PySparkValueError
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
if TYPE_CHECKING:
|
|
99
|
+
from pyspark.sql.connect._typing import DataTypeOrString
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
def _configure_logging() -> logging.Logger:
|
|
103
|
+
"""Configure logging for the Spark Connect clients."""
|
|
104
|
+
logger = logging.getLogger(__name__)
|
|
105
|
+
handler = logging.StreamHandler()
|
|
106
|
+
handler.setFormatter(
|
|
107
|
+
logging.Formatter(fmt="%(asctime)s %(process)d %(levelname)s %(funcName)s %(message)s")
|
|
108
|
+
)
|
|
109
|
+
logger.addHandler(handler)
|
|
110
|
+
|
|
111
|
+
# Check the environment variables for log levels:
|
|
112
|
+
if "SPARK_CONNECT_LOG_LEVEL" in os.environ:
|
|
113
|
+
logger.setLevel(os.environ["SPARK_CONNECT_LOG_LEVEL"].upper())
|
|
114
|
+
else:
|
|
115
|
+
logger.disabled = True
|
|
116
|
+
return logger
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
# Instantiate the logger based on the environment configuration.
|
|
120
|
+
logger = _configure_logging()
|
|
121
|
+
|
|
122
|
+
|
|
123
|
+
def getLogLevel() -> Optional[int]:
|
|
124
|
+
"""
|
|
125
|
+
This returns this log level as integer, or none (if no logging is enabled).
|
|
126
|
+
|
|
127
|
+
Spark Connect logging can be configured with environment variable 'SPARK_CONNECT_LOG_LEVEL'
|
|
128
|
+
|
|
129
|
+
.. versionadded:: 3.5.0
|
|
130
|
+
"""
|
|
131
|
+
|
|
132
|
+
if not logger.disabled:
|
|
133
|
+
return logger.level
|
|
134
|
+
return None
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
class ChannelBuilder:
|
|
138
|
+
"""
|
|
139
|
+
This is a helper class that is used to create a GRPC channel based on the given
|
|
140
|
+
connection string per the documentation of Spark Connect.
|
|
141
|
+
|
|
142
|
+
.. versionadded:: 3.4.0
|
|
143
|
+
|
|
144
|
+
Examples
|
|
145
|
+
--------
|
|
146
|
+
>>> cb = ChannelBuilder("sc://localhost")
|
|
147
|
+
... cb.endpoint
|
|
148
|
+
"localhost:15002"
|
|
149
|
+
|
|
150
|
+
>>> cb = ChannelBuilder("sc://localhost/;use_ssl=true;token=aaa")
|
|
151
|
+
... cb.secure
|
|
152
|
+
True
|
|
153
|
+
"""
|
|
154
|
+
|
|
155
|
+
PARAM_USE_SSL = "use_ssl"
|
|
156
|
+
PARAM_TOKEN = "token"
|
|
157
|
+
PARAM_USER_ID = "user_id"
|
|
158
|
+
PARAM_USER_AGENT = "user_agent"
|
|
159
|
+
PARAM_SESSION_ID = "session_id"
|
|
160
|
+
MAX_MESSAGE_LENGTH = 128 * 1024 * 1024
|
|
161
|
+
|
|
162
|
+
@staticmethod
|
|
163
|
+
def default_port() -> int:
|
|
164
|
+
if "SPARK_TESTING" in os.environ:
|
|
165
|
+
from pyspark.sql.session import SparkSession as PySparkSession
|
|
166
|
+
|
|
167
|
+
# In the case when Spark Connect uses the local mode, it starts the regular Spark
|
|
168
|
+
# session that starts Spark Connect server that sets `SparkSession._instantiatedSession`
|
|
169
|
+
# via SparkSession.__init__.
|
|
170
|
+
#
|
|
171
|
+
# We are getting the actual server port from the Spark session via Py4J to address
|
|
172
|
+
# the case when the server port is set to 0 (in which allocates an ephemeral port).
|
|
173
|
+
#
|
|
174
|
+
# This is only used in the test/development mode.
|
|
175
|
+
session = PySparkSession._instantiatedSession
|
|
176
|
+
|
|
177
|
+
# 'spark.local.connect' is set when we use the local mode in Spark Connect.
|
|
178
|
+
if session is not None and session.conf.get("spark.local.connect", "0") == "1":
|
|
179
|
+
|
|
180
|
+
jvm = PySparkSession._instantiatedSession._jvm # type: ignore[union-attr]
|
|
181
|
+
return getattr(
|
|
182
|
+
getattr(
|
|
183
|
+
jvm.org.apache.spark.sql.connect.service, # type: ignore[union-attr]
|
|
184
|
+
"SparkConnectService$",
|
|
185
|
+
),
|
|
186
|
+
"MODULE$",
|
|
187
|
+
).localPort()
|
|
188
|
+
return 15002
|
|
189
|
+
|
|
190
|
+
def __init__(self, url: str, channelOptions: Optional[List[Tuple[str, Any]]] = None) -> None:
|
|
191
|
+
"""
|
|
192
|
+
Constructs a new channel builder. This is used to create the proper GRPC channel from
|
|
193
|
+
the connection string.
|
|
194
|
+
|
|
195
|
+
Parameters
|
|
196
|
+
----------
|
|
197
|
+
url : str
|
|
198
|
+
Spark Connect connection string
|
|
199
|
+
channelOptions: list of tuple, optional
|
|
200
|
+
Additional options that can be passed to the GRPC channel construction.
|
|
201
|
+
"""
|
|
202
|
+
# Explicitly check the scheme of the URL.
|
|
203
|
+
if url[:5] != "sc://":
|
|
204
|
+
raise PySparkValueError(
|
|
205
|
+
error_class="INVALID_CONNECT_URL",
|
|
206
|
+
message_parameters={
|
|
207
|
+
"detail": "The URL must start with 'sc://'. Please update the URL to "
|
|
208
|
+
"follow the correct format, e.g., 'sc://hostname:port'.",
|
|
209
|
+
},
|
|
210
|
+
)
|
|
211
|
+
# Rewrite the URL to use http as the scheme so that we can leverage
|
|
212
|
+
# Python's built-in parser.
|
|
213
|
+
tmp_url = "http" + url[2:]
|
|
214
|
+
self.url = urllib.parse.urlparse(tmp_url)
|
|
215
|
+
self.params: Dict[str, str] = {}
|
|
216
|
+
if len(self.url.path) > 0 and self.url.path != "/":
|
|
217
|
+
raise PySparkValueError(
|
|
218
|
+
error_class="INVALID_CONNECT_URL",
|
|
219
|
+
message_parameters={
|
|
220
|
+
"detail": f"The path component '{self.url.path}' must be empty. Please update "
|
|
221
|
+
f"the URL to follow the correct format, e.g., 'sc://hostname:port'.",
|
|
222
|
+
},
|
|
223
|
+
)
|
|
224
|
+
self._extract_attributes()
|
|
225
|
+
|
|
226
|
+
GRPC_DEFAULT_OPTIONS = [
|
|
227
|
+
("grpc.max_send_message_length", ChannelBuilder.MAX_MESSAGE_LENGTH),
|
|
228
|
+
("grpc.max_receive_message_length", ChannelBuilder.MAX_MESSAGE_LENGTH),
|
|
229
|
+
]
|
|
230
|
+
|
|
231
|
+
if channelOptions is None:
|
|
232
|
+
self._channel_options = GRPC_DEFAULT_OPTIONS
|
|
233
|
+
else:
|
|
234
|
+
self._channel_options = GRPC_DEFAULT_OPTIONS + channelOptions
|
|
235
|
+
|
|
236
|
+
def _extract_attributes(self) -> None:
|
|
237
|
+
if len(self.url.params) > 0:
|
|
238
|
+
parts = self.url.params.split(";")
|
|
239
|
+
for p in parts:
|
|
240
|
+
kv = p.split("=")
|
|
241
|
+
if len(kv) != 2:
|
|
242
|
+
raise PySparkValueError(
|
|
243
|
+
error_class="INVALID_CONNECT_URL",
|
|
244
|
+
message_parameters={
|
|
245
|
+
"detail": f"Parameter '{p}' should be provided as a "
|
|
246
|
+
f"key-value pair separated by an equal sign (=). Please update "
|
|
247
|
+
f"the parameter to follow the correct format, e.g., 'key=value'.",
|
|
248
|
+
},
|
|
249
|
+
)
|
|
250
|
+
self.params[kv[0]] = urllib.parse.unquote(kv[1])
|
|
251
|
+
|
|
252
|
+
netloc = self.url.netloc.split(":")
|
|
253
|
+
if len(netloc) == 1:
|
|
254
|
+
self.host = netloc[0]
|
|
255
|
+
self.port = ChannelBuilder.default_port()
|
|
256
|
+
elif len(netloc) == 2:
|
|
257
|
+
self.host = netloc[0]
|
|
258
|
+
self.port = int(netloc[1])
|
|
259
|
+
else:
|
|
260
|
+
raise PySparkValueError(
|
|
261
|
+
error_class="INVALID_CONNECT_URL",
|
|
262
|
+
message_parameters={
|
|
263
|
+
"detail": f"Target destination '{self.url.netloc}' should match the "
|
|
264
|
+
f"'<host>:<port>' pattern. Please update the destination to follow "
|
|
265
|
+
f"the correct format, e.g., 'hostname:port'.",
|
|
266
|
+
},
|
|
267
|
+
)
|
|
268
|
+
|
|
269
|
+
def metadata(self) -> Iterable[Tuple[str, str]]:
|
|
270
|
+
"""
|
|
271
|
+
Builds the GRPC specific metadata list to be injected into the request. All
|
|
272
|
+
parameters will be converted to metadata except ones that are explicitly used
|
|
273
|
+
by the channel.
|
|
274
|
+
|
|
275
|
+
Returns
|
|
276
|
+
-------
|
|
277
|
+
A list of tuples (key, value)
|
|
278
|
+
"""
|
|
279
|
+
return [
|
|
280
|
+
(k, self.params[k])
|
|
281
|
+
for k in self.params
|
|
282
|
+
if k
|
|
283
|
+
not in [
|
|
284
|
+
ChannelBuilder.PARAM_TOKEN,
|
|
285
|
+
ChannelBuilder.PARAM_USE_SSL,
|
|
286
|
+
ChannelBuilder.PARAM_USER_ID,
|
|
287
|
+
ChannelBuilder.PARAM_USER_AGENT,
|
|
288
|
+
ChannelBuilder.PARAM_SESSION_ID,
|
|
289
|
+
]
|
|
290
|
+
]
|
|
291
|
+
|
|
292
|
+
@property
|
|
293
|
+
def secure(self) -> bool:
|
|
294
|
+
if self._token is not None:
|
|
295
|
+
return True
|
|
296
|
+
|
|
297
|
+
value = self.params.get(ChannelBuilder.PARAM_USE_SSL, "")
|
|
298
|
+
return value.lower() == "true"
|
|
299
|
+
|
|
300
|
+
@property
|
|
301
|
+
def endpoint(self) -> str:
|
|
302
|
+
return f"{self.host}:{self.port}"
|
|
303
|
+
|
|
304
|
+
@property
|
|
305
|
+
def _token(self) -> Optional[str]:
|
|
306
|
+
return self.params.get(ChannelBuilder.PARAM_TOKEN, None)
|
|
307
|
+
|
|
308
|
+
@property
|
|
309
|
+
def userId(self) -> Optional[str]:
|
|
310
|
+
"""
|
|
311
|
+
Returns
|
|
312
|
+
-------
|
|
313
|
+
The user_id extracted from the parameters of the connection string or `None` if not
|
|
314
|
+
specified.
|
|
315
|
+
"""
|
|
316
|
+
return self.params.get(ChannelBuilder.PARAM_USER_ID, None)
|
|
317
|
+
|
|
318
|
+
@property
|
|
319
|
+
def userAgent(self) -> str:
|
|
320
|
+
"""
|
|
321
|
+
Returns
|
|
322
|
+
-------
|
|
323
|
+
user_agent : str
|
|
324
|
+
The user_agent parameter specified in the connection string,
|
|
325
|
+
or "_SPARK_CONNECT_PYTHON" when not specified.
|
|
326
|
+
The returned value will be percent encoded.
|
|
327
|
+
"""
|
|
328
|
+
user_agent = self.params.get(
|
|
329
|
+
ChannelBuilder.PARAM_USER_AGENT,
|
|
330
|
+
os.getenv("SPARK_CONNECT_USER_AGENT", "_SPARK_CONNECT_PYTHON"),
|
|
331
|
+
)
|
|
332
|
+
ua_len = len(urllib.parse.quote(user_agent))
|
|
333
|
+
if ua_len > 2048:
|
|
334
|
+
raise SparkConnectException(
|
|
335
|
+
f"'user_agent' parameter should not exceed 2048 characters, found {len} characters."
|
|
336
|
+
)
|
|
337
|
+
return " ".join(
|
|
338
|
+
[
|
|
339
|
+
user_agent,
|
|
340
|
+
f"spark/{__version__}",
|
|
341
|
+
f"os/{platform.uname().system.lower()}",
|
|
342
|
+
f"python/{platform.python_version()}",
|
|
343
|
+
]
|
|
344
|
+
)
|
|
345
|
+
|
|
346
|
+
def get(self, key: str) -> Any:
|
|
347
|
+
"""
|
|
348
|
+
Parameters
|
|
349
|
+
----------
|
|
350
|
+
key : str
|
|
351
|
+
Parameter key name.
|
|
352
|
+
|
|
353
|
+
Returns
|
|
354
|
+
-------
|
|
355
|
+
The parameter value if present, raises exception otherwise.
|
|
356
|
+
"""
|
|
357
|
+
return self.params[key]
|
|
358
|
+
|
|
359
|
+
@property
|
|
360
|
+
def session_id(self) -> Optional[str]:
|
|
361
|
+
"""
|
|
362
|
+
Returns
|
|
363
|
+
-------
|
|
364
|
+
The session_id extracted from the parameters of the connection string or `None` if not
|
|
365
|
+
specified.
|
|
366
|
+
"""
|
|
367
|
+
session_id = self.params.get(ChannelBuilder.PARAM_SESSION_ID, None)
|
|
368
|
+
if session_id is not None:
|
|
369
|
+
try:
|
|
370
|
+
uuid.UUID(session_id, version=4)
|
|
371
|
+
except ValueError as ve:
|
|
372
|
+
raise ValueError("Parameter value 'session_id' must be a valid UUID format.", ve)
|
|
373
|
+
return session_id
|
|
374
|
+
|
|
375
|
+
def toChannel(self) -> grpc.Channel:
|
|
376
|
+
"""
|
|
377
|
+
Applies the parameters of the connection string and creates a new
|
|
378
|
+
GRPC channel according to the configuration. Passes optional channel options to
|
|
379
|
+
construct the channel.
|
|
380
|
+
|
|
381
|
+
Returns
|
|
382
|
+
-------
|
|
383
|
+
GRPC Channel instance.
|
|
384
|
+
"""
|
|
385
|
+
destination = f"{self.host}:{self.port}"
|
|
386
|
+
|
|
387
|
+
# Setting a token implicitly sets the `use_ssl` to True.
|
|
388
|
+
if not self.secure and self._token is not None:
|
|
389
|
+
use_secure = True
|
|
390
|
+
elif self.secure:
|
|
391
|
+
use_secure = True
|
|
392
|
+
else:
|
|
393
|
+
use_secure = False
|
|
394
|
+
|
|
395
|
+
if not use_secure:
|
|
396
|
+
return grpc.insecure_channel(destination, options=self._channel_options)
|
|
397
|
+
else:
|
|
398
|
+
# Default SSL Credentials.
|
|
399
|
+
opt_token = self.params.get(ChannelBuilder.PARAM_TOKEN, None)
|
|
400
|
+
# When a token is present, pass the token to the channel.
|
|
401
|
+
if opt_token is not None:
|
|
402
|
+
ssl_creds = grpc.ssl_channel_credentials()
|
|
403
|
+
composite_creds = grpc.composite_channel_credentials(
|
|
404
|
+
ssl_creds, grpc.access_token_call_credentials(opt_token)
|
|
405
|
+
)
|
|
406
|
+
return grpc.secure_channel(
|
|
407
|
+
destination, credentials=composite_creds, options=self._channel_options
|
|
408
|
+
)
|
|
409
|
+
else:
|
|
410
|
+
return grpc.secure_channel(
|
|
411
|
+
destination,
|
|
412
|
+
credentials=grpc.ssl_channel_credentials(),
|
|
413
|
+
options=self._channel_options,
|
|
414
|
+
)
|
|
415
|
+
|
|
416
|
+
|
|
417
|
+
class MetricValue:
|
|
418
|
+
def __init__(self, name: str, value: Union[int, float], type: str):
|
|
419
|
+
self._name = name
|
|
420
|
+
self._type = type
|
|
421
|
+
self._value = value
|
|
422
|
+
|
|
423
|
+
def __repr__(self) -> str:
|
|
424
|
+
return f"<{self._name}={self._value} ({self._type})>"
|
|
425
|
+
|
|
426
|
+
@property
|
|
427
|
+
def name(self) -> str:
|
|
428
|
+
return self._name
|
|
429
|
+
|
|
430
|
+
@property
|
|
431
|
+
def value(self) -> Union[int, float]:
|
|
432
|
+
return self._value
|
|
433
|
+
|
|
434
|
+
@property
|
|
435
|
+
def metric_type(self) -> str:
|
|
436
|
+
return self._type
|
|
437
|
+
|
|
438
|
+
|
|
439
|
+
class PlanMetrics:
|
|
440
|
+
def __init__(self, name: str, id: int, parent: int, metrics: List[MetricValue]):
|
|
441
|
+
self._name = name
|
|
442
|
+
self._id = id
|
|
443
|
+
self._parent_id = parent
|
|
444
|
+
self._metrics = metrics
|
|
445
|
+
|
|
446
|
+
def __repr__(self) -> str:
|
|
447
|
+
return f"Plan({self._name})={self._metrics}"
|
|
448
|
+
|
|
449
|
+
@property
|
|
450
|
+
def name(self) -> str:
|
|
451
|
+
return self._name
|
|
452
|
+
|
|
453
|
+
@property
|
|
454
|
+
def plan_id(self) -> int:
|
|
455
|
+
return self._id
|
|
456
|
+
|
|
457
|
+
@property
|
|
458
|
+
def parent_plan_id(self) -> int:
|
|
459
|
+
return self._parent_id
|
|
460
|
+
|
|
461
|
+
@property
|
|
462
|
+
def metrics(self) -> List[MetricValue]:
|
|
463
|
+
return self._metrics
|
|
464
|
+
|
|
465
|
+
|
|
466
|
+
class PlanObservedMetrics:
|
|
467
|
+
def __init__(self, name: str, metrics: List[pb2.Expression.Literal]):
|
|
468
|
+
self._name = name
|
|
469
|
+
self._metrics = metrics
|
|
470
|
+
|
|
471
|
+
def __repr__(self) -> str:
|
|
472
|
+
return f"Plan observed({self._name}={self._metrics})"
|
|
473
|
+
|
|
474
|
+
@property
|
|
475
|
+
def name(self) -> str:
|
|
476
|
+
return self._name
|
|
477
|
+
|
|
478
|
+
@property
|
|
479
|
+
def metrics(self) -> List[pb2.Expression.Literal]:
|
|
480
|
+
return self._metrics
|
|
481
|
+
|
|
482
|
+
|
|
483
|
+
class AnalyzeResult:
|
|
484
|
+
def __init__(
|
|
485
|
+
self,
|
|
486
|
+
schema: Optional[DataType],
|
|
487
|
+
explain_string: Optional[str],
|
|
488
|
+
tree_string: Optional[str],
|
|
489
|
+
is_local: Optional[bool],
|
|
490
|
+
is_streaming: Optional[bool],
|
|
491
|
+
input_files: Optional[List[str]],
|
|
492
|
+
spark_version: Optional[str],
|
|
493
|
+
parsed: Optional[DataType],
|
|
494
|
+
is_same_semantics: Optional[bool],
|
|
495
|
+
semantic_hash: Optional[int],
|
|
496
|
+
storage_level: Optional[StorageLevel],
|
|
497
|
+
):
|
|
498
|
+
self.schema = schema
|
|
499
|
+
self.explain_string = explain_string
|
|
500
|
+
self.tree_string = tree_string
|
|
501
|
+
self.is_local = is_local
|
|
502
|
+
self.is_streaming = is_streaming
|
|
503
|
+
self.input_files = input_files
|
|
504
|
+
self.spark_version = spark_version
|
|
505
|
+
self.parsed = parsed
|
|
506
|
+
self.is_same_semantics = is_same_semantics
|
|
507
|
+
self.semantic_hash = semantic_hash
|
|
508
|
+
self.storage_level = storage_level
|
|
509
|
+
|
|
510
|
+
@classmethod
|
|
511
|
+
def fromProto(cls, pb: Any) -> "AnalyzeResult":
|
|
512
|
+
schema: Optional[DataType] = None
|
|
513
|
+
explain_string: Optional[str] = None
|
|
514
|
+
tree_string: Optional[str] = None
|
|
515
|
+
is_local: Optional[bool] = None
|
|
516
|
+
is_streaming: Optional[bool] = None
|
|
517
|
+
input_files: Optional[List[str]] = None
|
|
518
|
+
spark_version: Optional[str] = None
|
|
519
|
+
parsed: Optional[DataType] = None
|
|
520
|
+
is_same_semantics: Optional[bool] = None
|
|
521
|
+
semantic_hash: Optional[int] = None
|
|
522
|
+
storage_level: Optional[StorageLevel] = None
|
|
523
|
+
|
|
524
|
+
if pb.HasField("schema"):
|
|
525
|
+
schema = types.proto_schema_to_pyspark_data_type(pb.schema.schema)
|
|
526
|
+
elif pb.HasField("explain"):
|
|
527
|
+
explain_string = pb.explain.explain_string
|
|
528
|
+
elif pb.HasField("tree_string"):
|
|
529
|
+
tree_string = pb.tree_string.tree_string
|
|
530
|
+
elif pb.HasField("is_local"):
|
|
531
|
+
is_local = pb.is_local.is_local
|
|
532
|
+
elif pb.HasField("is_streaming"):
|
|
533
|
+
is_streaming = pb.is_streaming.is_streaming
|
|
534
|
+
elif pb.HasField("input_files"):
|
|
535
|
+
input_files = pb.input_files.files
|
|
536
|
+
elif pb.HasField("spark_version"):
|
|
537
|
+
spark_version = pb.spark_version.version
|
|
538
|
+
elif pb.HasField("ddl_parse"):
|
|
539
|
+
parsed = types.proto_schema_to_pyspark_data_type(pb.ddl_parse.parsed)
|
|
540
|
+
elif pb.HasField("same_semantics"):
|
|
541
|
+
is_same_semantics = pb.same_semantics.result
|
|
542
|
+
elif pb.HasField("semantic_hash"):
|
|
543
|
+
semantic_hash = pb.semantic_hash.result
|
|
544
|
+
elif pb.HasField("persist"):
|
|
545
|
+
pass
|
|
546
|
+
elif pb.HasField("unpersist"):
|
|
547
|
+
pass
|
|
548
|
+
elif pb.HasField("get_storage_level"):
|
|
549
|
+
storage_level = proto_to_storage_level(pb.get_storage_level.storage_level)
|
|
550
|
+
else:
|
|
551
|
+
raise SparkConnectException("No analyze result found!")
|
|
552
|
+
|
|
553
|
+
return AnalyzeResult(
|
|
554
|
+
schema,
|
|
555
|
+
explain_string,
|
|
556
|
+
tree_string,
|
|
557
|
+
is_local,
|
|
558
|
+
is_streaming,
|
|
559
|
+
input_files,
|
|
560
|
+
spark_version,
|
|
561
|
+
parsed,
|
|
562
|
+
is_same_semantics,
|
|
563
|
+
semantic_hash,
|
|
564
|
+
storage_level,
|
|
565
|
+
)
|
|
566
|
+
|
|
567
|
+
|
|
568
|
+
class ConfigResult:
|
|
569
|
+
def __init__(self, pairs: List[Tuple[str, Optional[str]]], warnings: List[str]):
|
|
570
|
+
self.pairs = pairs
|
|
571
|
+
self.warnings = warnings
|
|
572
|
+
|
|
573
|
+
@classmethod
|
|
574
|
+
def fromProto(cls, pb: pb2.ConfigResponse) -> "ConfigResult":
|
|
575
|
+
return ConfigResult(
|
|
576
|
+
pairs=[(pair.key, pair.value if pair.HasField("value") else None) for pair in pb.pairs],
|
|
577
|
+
warnings=list(pb.warnings),
|
|
578
|
+
)
|
|
579
|
+
|
|
580
|
+
|
|
581
|
+
class SparkConnectClient(object):
|
|
582
|
+
"""
|
|
583
|
+
Conceptually the remote spark session that communicates with the server
|
|
584
|
+
"""
|
|
585
|
+
|
|
586
|
+
@classmethod
|
|
587
|
+
def retry_exception(cls, e: Exception) -> bool:
|
|
588
|
+
"""
|
|
589
|
+
Helper function that is used to identify if an exception thrown by the server
|
|
590
|
+
can be retried or not.
|
|
591
|
+
|
|
592
|
+
Parameters
|
|
593
|
+
----------
|
|
594
|
+
e : Exception
|
|
595
|
+
The GRPC error as received from the server. Typed as Exception, because other exception
|
|
596
|
+
thrown during client processing can be passed here as well.
|
|
597
|
+
|
|
598
|
+
Returns
|
|
599
|
+
-------
|
|
600
|
+
True if the exception can be retried, False otherwise.
|
|
601
|
+
|
|
602
|
+
"""
|
|
603
|
+
if not isinstance(e, grpc.RpcError):
|
|
604
|
+
return False
|
|
605
|
+
|
|
606
|
+
if e.code() in [grpc.StatusCode.INTERNAL]:
|
|
607
|
+
msg = str(e)
|
|
608
|
+
|
|
609
|
+
# This error happens if another RPC preempts this RPC.
|
|
610
|
+
if "INVALID_CURSOR.DISCONNECTED" in msg:
|
|
611
|
+
return True
|
|
612
|
+
|
|
613
|
+
if e.code() == grpc.StatusCode.UNAVAILABLE:
|
|
614
|
+
return True
|
|
615
|
+
|
|
616
|
+
return False
|
|
617
|
+
|
|
618
|
+
def __init__(
|
|
619
|
+
self,
|
|
620
|
+
connection: Union[str, ChannelBuilder],
|
|
621
|
+
user_id: Optional[str] = None,
|
|
622
|
+
channel_options: Optional[List[Tuple[str, Any]]] = None,
|
|
623
|
+
retry_policy: Optional[Dict[str, Any]] = None,
|
|
624
|
+
use_reattachable_execute: bool = True,
|
|
625
|
+
):
|
|
626
|
+
"""
|
|
627
|
+
Creates a new SparkSession for the Spark Connect interface.
|
|
628
|
+
|
|
629
|
+
Parameters
|
|
630
|
+
----------
|
|
631
|
+
connection : str or :class:`ChannelBuilder`
|
|
632
|
+
Connection string that is used to extract the connection parameters and configure
|
|
633
|
+
the GRPC connection. Or instance of ChannelBuilder that creates GRPC connection.
|
|
634
|
+
Defaults to `sc://localhost`.
|
|
635
|
+
user_id : str, optional
|
|
636
|
+
Optional unique user ID that is used to differentiate multiple users and
|
|
637
|
+
isolate their Spark Sessions. If the `user_id` is not set, will default to
|
|
638
|
+
the $USER environment. Defining the user ID as part of the connection string
|
|
639
|
+
takes precedence.
|
|
640
|
+
channel_options: list of tuple, optional
|
|
641
|
+
Additional options that can be passed to the GRPC channel construction.
|
|
642
|
+
retry_policy: dict of str and any, optional
|
|
643
|
+
Additional configuration for retrying. There are four configurations as below
|
|
644
|
+
* ``max_retries``
|
|
645
|
+
Maximum number of tries default 15
|
|
646
|
+
* ``backoff_multiplier``
|
|
647
|
+
Backoff multiplier for the policy. Default: 4(ms)
|
|
648
|
+
* ``initial_backoff``
|
|
649
|
+
Backoff to wait before the first retry. Default: 50(ms)
|
|
650
|
+
* ``max_backoff``
|
|
651
|
+
Maximum backoff controls the maximum amount of time to wait before retrying
|
|
652
|
+
a failed request. Default: 60000(ms).
|
|
653
|
+
use_reattachable_execute: bool
|
|
654
|
+
Enable reattachable execution.
|
|
655
|
+
"""
|
|
656
|
+
self.thread_local = threading.local()
|
|
657
|
+
|
|
658
|
+
# Parse the connection string.
|
|
659
|
+
self._builder = (
|
|
660
|
+
connection
|
|
661
|
+
if isinstance(connection, ChannelBuilder)
|
|
662
|
+
else ChannelBuilder(connection, channel_options)
|
|
663
|
+
)
|
|
664
|
+
self._user_id = None
|
|
665
|
+
self._retry_policy = {
|
|
666
|
+
# Please synchronize changes here with Scala side
|
|
667
|
+
# GrpcRetryHandler.scala
|
|
668
|
+
#
|
|
669
|
+
# Note: the number of retries is selected so that the maximum tolerated wait
|
|
670
|
+
# is guaranteed to be at least 10 minutes
|
|
671
|
+
"max_retries": 15,
|
|
672
|
+
"backoff_multiplier": 4.0,
|
|
673
|
+
"initial_backoff": 50,
|
|
674
|
+
"max_backoff": 60000,
|
|
675
|
+
"jitter": 500,
|
|
676
|
+
"min_jitter_threshold": 2000,
|
|
677
|
+
}
|
|
678
|
+
if retry_policy:
|
|
679
|
+
self._retry_policy.update(retry_policy)
|
|
680
|
+
|
|
681
|
+
if self._builder.session_id is None:
|
|
682
|
+
# Generate a unique session ID for this client. This UUID must be unique to allow
|
|
683
|
+
# concurrent Spark sessions of the same user. If the channel is closed, creating
|
|
684
|
+
# a new client will create a new session ID.
|
|
685
|
+
self._session_id = str(uuid.uuid4())
|
|
686
|
+
else:
|
|
687
|
+
# Use the pre-defined session ID.
|
|
688
|
+
self._session_id = str(self._builder.session_id)
|
|
689
|
+
|
|
690
|
+
if self._builder.userId is not None:
|
|
691
|
+
self._user_id = self._builder.userId
|
|
692
|
+
elif user_id is not None:
|
|
693
|
+
self._user_id = user_id
|
|
694
|
+
else:
|
|
695
|
+
self._user_id = os.getenv("USER", None)
|
|
696
|
+
|
|
697
|
+
self._channel = self._builder.toChannel()
|
|
698
|
+
self._closed = False
|
|
699
|
+
self._stub = grpc_lib.SparkConnectServiceStub(self._channel)
|
|
700
|
+
self._artifact_manager = ArtifactManager(
|
|
701
|
+
self._user_id, self._session_id, self._channel, self._builder.metadata()
|
|
702
|
+
)
|
|
703
|
+
self._use_reattachable_execute = use_reattachable_execute
|
|
704
|
+
# Configure logging for the SparkConnect client.
|
|
705
|
+
|
|
706
|
+
def _retrying(self) -> "Retrying":
|
|
707
|
+
return Retrying(
|
|
708
|
+
can_retry=SparkConnectClient.retry_exception, **self._retry_policy # type: ignore
|
|
709
|
+
)
|
|
710
|
+
|
|
711
|
+
def disable_reattachable_execute(self) -> "SparkConnectClient":
|
|
712
|
+
self._use_reattachable_execute = False
|
|
713
|
+
return self
|
|
714
|
+
|
|
715
|
+
def enable_reattachable_execute(self) -> "SparkConnectClient":
|
|
716
|
+
self._use_reattachable_execute = True
|
|
717
|
+
return self
|
|
718
|
+
|
|
719
|
+
def register_udf(
|
|
720
|
+
self,
|
|
721
|
+
function: Any,
|
|
722
|
+
return_type: "DataTypeOrString",
|
|
723
|
+
name: Optional[str] = None,
|
|
724
|
+
eval_type: int = PythonEvalType.SQL_BATCHED_UDF,
|
|
725
|
+
deterministic: bool = True,
|
|
726
|
+
) -> str:
|
|
727
|
+
"""
|
|
728
|
+
Create a temporary UDF in the session catalog on the other side. We generate a
|
|
729
|
+
temporary name for it.
|
|
730
|
+
"""
|
|
731
|
+
|
|
732
|
+
if name is None:
|
|
733
|
+
name = f"fun_{uuid.uuid4().hex}"
|
|
734
|
+
|
|
735
|
+
# construct a PythonUDF
|
|
736
|
+
py_udf = PythonUDF(
|
|
737
|
+
output_type=return_type,
|
|
738
|
+
eval_type=eval_type,
|
|
739
|
+
func=function,
|
|
740
|
+
python_ver="%d.%d" % sys.version_info[:2],
|
|
741
|
+
)
|
|
742
|
+
|
|
743
|
+
# construct a CommonInlineUserDefinedFunction
|
|
744
|
+
fun = CommonInlineUserDefinedFunction(
|
|
745
|
+
function_name=name,
|
|
746
|
+
arguments=[],
|
|
747
|
+
function=py_udf,
|
|
748
|
+
deterministic=deterministic,
|
|
749
|
+
).to_plan_udf(self)
|
|
750
|
+
|
|
751
|
+
# construct the request
|
|
752
|
+
req = self._execute_plan_request_with_metadata()
|
|
753
|
+
req.plan.command.register_function.CopyFrom(fun)
|
|
754
|
+
|
|
755
|
+
self._execute(req)
|
|
756
|
+
return name
|
|
757
|
+
|
|
758
|
+
def register_udtf(
|
|
759
|
+
self,
|
|
760
|
+
function: Any,
|
|
761
|
+
return_type: "DataTypeOrString",
|
|
762
|
+
name: str,
|
|
763
|
+
eval_type: int = PythonEvalType.SQL_TABLE_UDF,
|
|
764
|
+
deterministic: bool = True,
|
|
765
|
+
) -> str:
|
|
766
|
+
"""
|
|
767
|
+
Register a user-defined table function (UDTF) in the session catalog
|
|
768
|
+
as a temporary function. The return type, if specified, must be a
|
|
769
|
+
struct type and it's validated when building the proto message
|
|
770
|
+
for the PythonUDTF.
|
|
771
|
+
"""
|
|
772
|
+
udtf = PythonUDTF(
|
|
773
|
+
func=function,
|
|
774
|
+
return_type=return_type,
|
|
775
|
+
eval_type=eval_type,
|
|
776
|
+
python_ver=get_python_ver(),
|
|
777
|
+
)
|
|
778
|
+
|
|
779
|
+
func = CommonInlineUserDefinedTableFunction(
|
|
780
|
+
function_name=name,
|
|
781
|
+
function=udtf,
|
|
782
|
+
deterministic=deterministic,
|
|
783
|
+
arguments=[],
|
|
784
|
+
).udtf_plan(self)
|
|
785
|
+
|
|
786
|
+
req = self._execute_plan_request_with_metadata()
|
|
787
|
+
req.plan.command.register_table_function.CopyFrom(func)
|
|
788
|
+
|
|
789
|
+
self._execute(req)
|
|
790
|
+
return name
|
|
791
|
+
|
|
792
|
+
def register_java(
|
|
793
|
+
self,
|
|
794
|
+
name: str,
|
|
795
|
+
javaClassName: str,
|
|
796
|
+
return_type: Optional["DataTypeOrString"] = None,
|
|
797
|
+
aggregate: bool = False,
|
|
798
|
+
) -> None:
|
|
799
|
+
# construct a JavaUDF
|
|
800
|
+
if return_type is None:
|
|
801
|
+
java_udf = JavaUDF(class_name=javaClassName, aggregate=aggregate)
|
|
802
|
+
else:
|
|
803
|
+
java_udf = JavaUDF(class_name=javaClassName, output_type=return_type)
|
|
804
|
+
fun = CommonInlineUserDefinedFunction(
|
|
805
|
+
function_name=name,
|
|
806
|
+
function=java_udf,
|
|
807
|
+
).to_plan_judf(self)
|
|
808
|
+
# construct the request
|
|
809
|
+
req = self._execute_plan_request_with_metadata()
|
|
810
|
+
req.plan.command.register_function.CopyFrom(fun)
|
|
811
|
+
|
|
812
|
+
self._execute(req)
|
|
813
|
+
|
|
814
|
+
def _build_metrics(self, metrics: "pb2.ExecutePlanResponse.Metrics") -> Iterator[PlanMetrics]:
|
|
815
|
+
return (
|
|
816
|
+
PlanMetrics(
|
|
817
|
+
x.name,
|
|
818
|
+
x.plan_id,
|
|
819
|
+
x.parent,
|
|
820
|
+
[MetricValue(k, v.value, v.metric_type) for k, v in x.execution_metrics.items()],
|
|
821
|
+
)
|
|
822
|
+
for x in metrics.metrics
|
|
823
|
+
)
|
|
824
|
+
|
|
825
|
+
def _resources(self) -> Dict[str, ResourceInformation]:
|
|
826
|
+
logger.info("Fetching the resources")
|
|
827
|
+
cmd = pb2.Command()
|
|
828
|
+
cmd.get_resources_command.SetInParent()
|
|
829
|
+
(_, properties) = self.execute_command(cmd)
|
|
830
|
+
resources = properties["get_resources_command_result"]
|
|
831
|
+
return resources
|
|
832
|
+
|
|
833
|
+
def _build_observed_metrics(
|
|
834
|
+
self, metrics: Sequence["pb2.ExecutePlanResponse.ObservedMetrics"]
|
|
835
|
+
) -> Iterator[PlanObservedMetrics]:
|
|
836
|
+
return (PlanObservedMetrics(x.name, [v for v in x.values]) for x in metrics)
|
|
837
|
+
|
|
838
|
+
def to_table_as_iterator(self, plan: pb2.Plan) -> Iterator[Union[StructType, "pa.Table"]]:
|
|
839
|
+
"""
|
|
840
|
+
Return given plan as a PyArrow Table iterator.
|
|
841
|
+
"""
|
|
842
|
+
logger.info(f"Executing plan {self._proto_to_string(plan)}")
|
|
843
|
+
req = self._execute_plan_request_with_metadata()
|
|
844
|
+
req.plan.CopyFrom(plan)
|
|
845
|
+
for response in self._execute_and_fetch_as_iterator(req):
|
|
846
|
+
if isinstance(response, StructType):
|
|
847
|
+
yield response
|
|
848
|
+
elif isinstance(response, pa.RecordBatch):
|
|
849
|
+
yield pa.Table.from_batches([response])
|
|
850
|
+
|
|
851
|
+
def to_table(self, plan: pb2.Plan) -> Tuple["pa.Table", Optional[StructType]]:
|
|
852
|
+
"""
|
|
853
|
+
Return given plan as a PyArrow Table.
|
|
854
|
+
"""
|
|
855
|
+
logger.info(f"Executing plan {self._proto_to_string(plan)}")
|
|
856
|
+
req = self._execute_plan_request_with_metadata()
|
|
857
|
+
req.plan.CopyFrom(plan)
|
|
858
|
+
table, schema, _, _, _ = self._execute_and_fetch(req)
|
|
859
|
+
assert table is not None
|
|
860
|
+
return table, schema
|
|
861
|
+
|
|
862
|
+
def to_pandas(self, plan: pb2.Plan) -> "pd.DataFrame":
|
|
863
|
+
"""
|
|
864
|
+
Return given plan as a pandas DataFrame.
|
|
865
|
+
"""
|
|
866
|
+
logger.info(f"Executing plan {self._proto_to_string(plan)}")
|
|
867
|
+
req = self._execute_plan_request_with_metadata()
|
|
868
|
+
req.plan.CopyFrom(plan)
|
|
869
|
+
(self_destruct_conf,) = self.get_config_with_defaults(
|
|
870
|
+
("spark.sql.execution.arrow.pyspark.selfDestruct.enabled", "false"),
|
|
871
|
+
)
|
|
872
|
+
self_destruct = cast(str, self_destruct_conf).lower() == "true"
|
|
873
|
+
table, schema, metrics, observed_metrics, _ = self._execute_and_fetch(
|
|
874
|
+
req, self_destruct=self_destruct
|
|
875
|
+
)
|
|
876
|
+
assert table is not None
|
|
877
|
+
|
|
878
|
+
schema = schema or from_arrow_schema(table.schema, prefer_timestamp_ntz=True)
|
|
879
|
+
assert schema is not None and isinstance(schema, StructType)
|
|
880
|
+
|
|
881
|
+
# Rename columns to avoid duplicated column names.
|
|
882
|
+
renamed_table = table.rename_columns([f"col_{i}" for i in range(table.num_columns)])
|
|
883
|
+
if self_destruct:
|
|
884
|
+
# Configure PyArrow to use as little memory as possible:
|
|
885
|
+
# self_destruct - free columns as they are converted
|
|
886
|
+
# split_blocks - create a separate Pandas block for each column
|
|
887
|
+
# use_threads - convert one column at a time
|
|
888
|
+
pandas_options = {
|
|
889
|
+
"self_destruct": True,
|
|
890
|
+
"split_blocks": True,
|
|
891
|
+
"use_threads": False,
|
|
892
|
+
}
|
|
893
|
+
pdf = renamed_table.to_pandas(**pandas_options)
|
|
894
|
+
else:
|
|
895
|
+
pdf = renamed_table.to_pandas()
|
|
896
|
+
pdf.columns = schema.names
|
|
897
|
+
|
|
898
|
+
if len(pdf.columns) > 0:
|
|
899
|
+
timezone: Optional[str] = None
|
|
900
|
+
if any(_has_type(f.dataType, TimestampType) for f in schema.fields):
|
|
901
|
+
(timezone,) = self.get_configs("spark.sql.session.timeZone")
|
|
902
|
+
|
|
903
|
+
struct_in_pandas: Optional[str] = None
|
|
904
|
+
error_on_duplicated_field_names: bool = False
|
|
905
|
+
if any(_has_type(f.dataType, StructType) for f in schema.fields):
|
|
906
|
+
(struct_in_pandas,) = self.get_config_with_defaults(
|
|
907
|
+
("spark.sql.execution.pandas.structHandlingMode", "legacy"),
|
|
908
|
+
)
|
|
909
|
+
|
|
910
|
+
if struct_in_pandas == "legacy":
|
|
911
|
+
error_on_duplicated_field_names = True
|
|
912
|
+
struct_in_pandas = "dict"
|
|
913
|
+
|
|
914
|
+
pdf = pd.concat(
|
|
915
|
+
[
|
|
916
|
+
_create_converter_to_pandas(
|
|
917
|
+
field.dataType,
|
|
918
|
+
field.nullable,
|
|
919
|
+
timezone=timezone,
|
|
920
|
+
struct_in_pandas=struct_in_pandas,
|
|
921
|
+
error_on_duplicated_field_names=error_on_duplicated_field_names,
|
|
922
|
+
)(pser)
|
|
923
|
+
for (_, pser), field, pa_field in zip(pdf.items(), schema.fields, table.schema)
|
|
924
|
+
],
|
|
925
|
+
axis="columns",
|
|
926
|
+
)
|
|
927
|
+
|
|
928
|
+
if len(metrics) > 0:
|
|
929
|
+
pdf.attrs["metrics"] = metrics
|
|
930
|
+
if len(observed_metrics) > 0:
|
|
931
|
+
pdf.attrs["observed_metrics"] = observed_metrics
|
|
932
|
+
return pdf
|
|
933
|
+
|
|
934
|
+
def _proto_to_string(self, p: google.protobuf.message.Message) -> str:
|
|
935
|
+
"""
|
|
936
|
+
Helper method to generate a one line string representation of the plan.
|
|
937
|
+
|
|
938
|
+
Parameters
|
|
939
|
+
----------
|
|
940
|
+
p : google.protobuf.message.Message
|
|
941
|
+
Generic Message type
|
|
942
|
+
|
|
943
|
+
Returns
|
|
944
|
+
-------
|
|
945
|
+
Single line string of the serialized proto message.
|
|
946
|
+
"""
|
|
947
|
+
return text_format.MessageToString(p, as_one_line=True)
|
|
948
|
+
|
|
949
|
+
def schema(self, plan: pb2.Plan) -> StructType:
|
|
950
|
+
"""
|
|
951
|
+
Return schema for given plan.
|
|
952
|
+
"""
|
|
953
|
+
logger.info(f"Schema for plan: {self._proto_to_string(plan)}")
|
|
954
|
+
schema = self._analyze(method="schema", plan=plan).schema
|
|
955
|
+
assert schema is not None
|
|
956
|
+
# Server side should populate the struct field which is the schema.
|
|
957
|
+
assert isinstance(schema, StructType)
|
|
958
|
+
return schema
|
|
959
|
+
|
|
960
|
+
def explain_string(self, plan: pb2.Plan, explain_mode: str = "extended") -> str:
|
|
961
|
+
"""
|
|
962
|
+
Return explain string for given plan.
|
|
963
|
+
"""
|
|
964
|
+
logger.info(f"Explain (mode={explain_mode}) for plan {self._proto_to_string(plan)}")
|
|
965
|
+
result = self._analyze(
|
|
966
|
+
method="explain", plan=plan, explain_mode=explain_mode
|
|
967
|
+
).explain_string
|
|
968
|
+
assert result is not None
|
|
969
|
+
return result
|
|
970
|
+
|
|
971
|
+
def execute_command(
|
|
972
|
+
self, command: pb2.Command
|
|
973
|
+
) -> Tuple[Optional[pd.DataFrame], Dict[str, Any]]:
|
|
974
|
+
"""
|
|
975
|
+
Execute given command.
|
|
976
|
+
"""
|
|
977
|
+
logger.info(f"Execute command for command {self._proto_to_string(command)}")
|
|
978
|
+
req = self._execute_plan_request_with_metadata()
|
|
979
|
+
if self._user_id:
|
|
980
|
+
req.user_context.user_id = self._user_id
|
|
981
|
+
req.plan.command.CopyFrom(command)
|
|
982
|
+
data, _, _, _, properties = self._execute_and_fetch(req)
|
|
983
|
+
if data is not None:
|
|
984
|
+
return (data.to_pandas(), properties)
|
|
985
|
+
else:
|
|
986
|
+
return (None, properties)
|
|
987
|
+
|
|
988
|
+
def same_semantics(self, plan: pb2.Plan, other: pb2.Plan) -> bool:
|
|
989
|
+
"""
|
|
990
|
+
return if two plans have the same semantics.
|
|
991
|
+
"""
|
|
992
|
+
result = self._analyze(method="same_semantics", plan=plan, other=other).is_same_semantics
|
|
993
|
+
assert result is not None
|
|
994
|
+
return result
|
|
995
|
+
|
|
996
|
+
def semantic_hash(self, plan: pb2.Plan) -> int:
|
|
997
|
+
"""
|
|
998
|
+
returns a `hashCode` of the logical query plan.
|
|
999
|
+
"""
|
|
1000
|
+
result = self._analyze(method="semantic_hash", plan=plan).semantic_hash
|
|
1001
|
+
assert result is not None
|
|
1002
|
+
return result
|
|
1003
|
+
|
|
1004
|
+
def close(self) -> None:
|
|
1005
|
+
"""
|
|
1006
|
+
Close the channel.
|
|
1007
|
+
"""
|
|
1008
|
+
ExecutePlanResponseReattachableIterator.shutdown()
|
|
1009
|
+
self._channel.close()
|
|
1010
|
+
self._closed = True
|
|
1011
|
+
|
|
1012
|
+
@property
|
|
1013
|
+
def is_closed(self) -> bool:
|
|
1014
|
+
"""
|
|
1015
|
+
Returns if the channel was closed previously using close() method
|
|
1016
|
+
"""
|
|
1017
|
+
return self._closed
|
|
1018
|
+
|
|
1019
|
+
@property
|
|
1020
|
+
def host(self) -> str:
|
|
1021
|
+
"""
|
|
1022
|
+
The hostname where this client intends to connect.
|
|
1023
|
+
"""
|
|
1024
|
+
return self._builder.host
|
|
1025
|
+
|
|
1026
|
+
@property
|
|
1027
|
+
def token(self) -> Optional[str]:
|
|
1028
|
+
"""
|
|
1029
|
+
The authentication bearer token during connection.
|
|
1030
|
+
If authentication is not using a bearer token, None will be returned.
|
|
1031
|
+
"""
|
|
1032
|
+
return self._builder._token
|
|
1033
|
+
|
|
1034
|
+
def _execute_plan_request_with_metadata(self) -> pb2.ExecutePlanRequest:
|
|
1035
|
+
req = pb2.ExecutePlanRequest(
|
|
1036
|
+
session_id=self._session_id,
|
|
1037
|
+
client_type=self._builder.userAgent,
|
|
1038
|
+
tags=list(self.get_tags()),
|
|
1039
|
+
)
|
|
1040
|
+
if self._user_id:
|
|
1041
|
+
req.user_context.user_id = self._user_id
|
|
1042
|
+
return req
|
|
1043
|
+
|
|
1044
|
+
def _analyze_plan_request_with_metadata(self) -> pb2.AnalyzePlanRequest:
|
|
1045
|
+
req = pb2.AnalyzePlanRequest()
|
|
1046
|
+
req.session_id = self._session_id
|
|
1047
|
+
req.client_type = self._builder.userAgent
|
|
1048
|
+
if self._user_id:
|
|
1049
|
+
req.user_context.user_id = self._user_id
|
|
1050
|
+
return req
|
|
1051
|
+
|
|
1052
|
+
def _analyze(self, method: str, **kwargs: Any) -> AnalyzeResult:
|
|
1053
|
+
"""
|
|
1054
|
+
Call the analyze RPC of Spark Connect.
|
|
1055
|
+
|
|
1056
|
+
Returns
|
|
1057
|
+
-------
|
|
1058
|
+
The result of the analyze call.
|
|
1059
|
+
"""
|
|
1060
|
+
req = self._analyze_plan_request_with_metadata()
|
|
1061
|
+
if method == "schema":
|
|
1062
|
+
req.schema.plan.CopyFrom(cast(pb2.Plan, kwargs.get("plan")))
|
|
1063
|
+
elif method == "explain":
|
|
1064
|
+
req.explain.plan.CopyFrom(cast(pb2.Plan, kwargs.get("plan")))
|
|
1065
|
+
explain_mode = kwargs.get("explain_mode")
|
|
1066
|
+
if explain_mode not in ["simple", "extended", "codegen", "cost", "formatted"]:
|
|
1067
|
+
raise PySparkValueError(
|
|
1068
|
+
error_class="UNKNOWN_EXPLAIN_MODE",
|
|
1069
|
+
message_parameters={
|
|
1070
|
+
"explain_mode": str(explain_mode),
|
|
1071
|
+
},
|
|
1072
|
+
)
|
|
1073
|
+
if explain_mode == "simple":
|
|
1074
|
+
req.explain.explain_mode = (
|
|
1075
|
+
pb2.AnalyzePlanRequest.Explain.ExplainMode.EXPLAIN_MODE_SIMPLE
|
|
1076
|
+
)
|
|
1077
|
+
elif explain_mode == "extended":
|
|
1078
|
+
req.explain.explain_mode = (
|
|
1079
|
+
pb2.AnalyzePlanRequest.Explain.ExplainMode.EXPLAIN_MODE_EXTENDED
|
|
1080
|
+
)
|
|
1081
|
+
elif explain_mode == "cost":
|
|
1082
|
+
req.explain.explain_mode = (
|
|
1083
|
+
pb2.AnalyzePlanRequest.Explain.ExplainMode.EXPLAIN_MODE_COST
|
|
1084
|
+
)
|
|
1085
|
+
elif explain_mode == "codegen":
|
|
1086
|
+
req.explain.explain_mode = (
|
|
1087
|
+
pb2.AnalyzePlanRequest.Explain.ExplainMode.EXPLAIN_MODE_CODEGEN
|
|
1088
|
+
)
|
|
1089
|
+
else: # formatted
|
|
1090
|
+
req.explain.explain_mode = (
|
|
1091
|
+
pb2.AnalyzePlanRequest.Explain.ExplainMode.EXPLAIN_MODE_FORMATTED
|
|
1092
|
+
)
|
|
1093
|
+
elif method == "tree_string":
|
|
1094
|
+
req.tree_string.plan.CopyFrom(cast(pb2.Plan, kwargs.get("plan")))
|
|
1095
|
+
level = kwargs.get("level")
|
|
1096
|
+
if level and isinstance(level, int):
|
|
1097
|
+
req.tree_string.level = level
|
|
1098
|
+
elif method == "is_local":
|
|
1099
|
+
req.is_local.plan.CopyFrom(cast(pb2.Plan, kwargs.get("plan")))
|
|
1100
|
+
elif method == "is_streaming":
|
|
1101
|
+
req.is_streaming.plan.CopyFrom(cast(pb2.Plan, kwargs.get("plan")))
|
|
1102
|
+
elif method == "input_files":
|
|
1103
|
+
req.input_files.plan.CopyFrom(cast(pb2.Plan, kwargs.get("plan")))
|
|
1104
|
+
elif method == "spark_version":
|
|
1105
|
+
req.spark_version.SetInParent()
|
|
1106
|
+
elif method == "ddl_parse":
|
|
1107
|
+
req.ddl_parse.ddl_string = cast(str, kwargs.get("ddl_string"))
|
|
1108
|
+
elif method == "same_semantics":
|
|
1109
|
+
req.same_semantics.target_plan.CopyFrom(cast(pb2.Plan, kwargs.get("plan")))
|
|
1110
|
+
req.same_semantics.other_plan.CopyFrom(cast(pb2.Plan, kwargs.get("other")))
|
|
1111
|
+
elif method == "semantic_hash":
|
|
1112
|
+
req.semantic_hash.plan.CopyFrom(cast(pb2.Plan, kwargs.get("plan")))
|
|
1113
|
+
elif method == "persist":
|
|
1114
|
+
req.persist.relation.CopyFrom(cast(pb2.Relation, kwargs.get("relation")))
|
|
1115
|
+
if kwargs.get("storage_level", None) is not None:
|
|
1116
|
+
storage_level = cast(StorageLevel, kwargs.get("storage_level"))
|
|
1117
|
+
req.persist.storage_level.CopyFrom(storage_level_to_proto(storage_level))
|
|
1118
|
+
elif method == "unpersist":
|
|
1119
|
+
req.unpersist.relation.CopyFrom(cast(pb2.Relation, kwargs.get("relation")))
|
|
1120
|
+
if kwargs.get("blocking", None) is not None:
|
|
1121
|
+
req.unpersist.blocking = cast(bool, kwargs.get("blocking"))
|
|
1122
|
+
elif method == "get_storage_level":
|
|
1123
|
+
req.get_storage_level.relation.CopyFrom(cast(pb2.Relation, kwargs.get("relation")))
|
|
1124
|
+
else:
|
|
1125
|
+
raise PySparkValueError(
|
|
1126
|
+
error_class="UNSUPPORTED_OPERATION",
|
|
1127
|
+
message_parameters={
|
|
1128
|
+
"operation": method,
|
|
1129
|
+
},
|
|
1130
|
+
)
|
|
1131
|
+
|
|
1132
|
+
try:
|
|
1133
|
+
for attempt in self._retrying():
|
|
1134
|
+
with attempt:
|
|
1135
|
+
resp = self._stub.AnalyzePlan(req, metadata=self._builder.metadata())
|
|
1136
|
+
if resp.session_id != self._session_id:
|
|
1137
|
+
raise SparkConnectException(
|
|
1138
|
+
"Received incorrect session identifier for request:"
|
|
1139
|
+
f"{resp.session_id} != {self._session_id}"
|
|
1140
|
+
)
|
|
1141
|
+
return AnalyzeResult.fromProto(resp)
|
|
1142
|
+
raise SparkConnectException("Invalid state during retry exception handling.")
|
|
1143
|
+
except Exception as error:
|
|
1144
|
+
self._handle_error(error)
|
|
1145
|
+
|
|
1146
|
+
def _execute(self, req: pb2.ExecutePlanRequest) -> None:
|
|
1147
|
+
"""
|
|
1148
|
+
Execute the passed request `req` and drop all results.
|
|
1149
|
+
|
|
1150
|
+
Parameters
|
|
1151
|
+
----------
|
|
1152
|
+
req : pb2.ExecutePlanRequest
|
|
1153
|
+
Proto representation of the plan.
|
|
1154
|
+
|
|
1155
|
+
"""
|
|
1156
|
+
logger.info("Execute")
|
|
1157
|
+
|
|
1158
|
+
def handle_response(b: pb2.ExecutePlanResponse) -> None:
|
|
1159
|
+
if b.session_id != self._session_id:
|
|
1160
|
+
raise SparkConnectException(
|
|
1161
|
+
"Received incorrect session identifier for request: "
|
|
1162
|
+
f"{b.session_id} != {self._session_id}"
|
|
1163
|
+
)
|
|
1164
|
+
|
|
1165
|
+
try:
|
|
1166
|
+
if self._use_reattachable_execute:
|
|
1167
|
+
# Don't use retryHandler - own retry handling is inside.
|
|
1168
|
+
generator = ExecutePlanResponseReattachableIterator(
|
|
1169
|
+
req, self._stub, self._retry_policy, self._builder.metadata()
|
|
1170
|
+
)
|
|
1171
|
+
for b in generator:
|
|
1172
|
+
handle_response(b)
|
|
1173
|
+
else:
|
|
1174
|
+
for attempt in self._retrying():
|
|
1175
|
+
with attempt:
|
|
1176
|
+
for b in self._stub.ExecutePlan(req, metadata=self._builder.metadata()):
|
|
1177
|
+
handle_response(b)
|
|
1178
|
+
except Exception as error:
|
|
1179
|
+
self._handle_error(error)
|
|
1180
|
+
|
|
1181
|
+
def _execute_and_fetch_as_iterator(
|
|
1182
|
+
self, req: pb2.ExecutePlanRequest
|
|
1183
|
+
) -> Iterator[
|
|
1184
|
+
Union[
|
|
1185
|
+
"pa.RecordBatch",
|
|
1186
|
+
StructType,
|
|
1187
|
+
PlanMetrics,
|
|
1188
|
+
PlanObservedMetrics,
|
|
1189
|
+
Dict[str, Any],
|
|
1190
|
+
]
|
|
1191
|
+
]:
|
|
1192
|
+
logger.info("ExecuteAndFetchAsIterator")
|
|
1193
|
+
|
|
1194
|
+
def handle_response(
|
|
1195
|
+
b: pb2.ExecutePlanResponse,
|
|
1196
|
+
) -> Iterator[
|
|
1197
|
+
Union[
|
|
1198
|
+
"pa.RecordBatch",
|
|
1199
|
+
StructType,
|
|
1200
|
+
PlanMetrics,
|
|
1201
|
+
PlanObservedMetrics,
|
|
1202
|
+
Dict[str, Any],
|
|
1203
|
+
]
|
|
1204
|
+
]:
|
|
1205
|
+
if b.session_id != self._session_id:
|
|
1206
|
+
raise SparkConnectException(
|
|
1207
|
+
"Received incorrect session identifier for request: "
|
|
1208
|
+
f"{b.session_id} != {self._session_id}"
|
|
1209
|
+
)
|
|
1210
|
+
if b.HasField("metrics"):
|
|
1211
|
+
logger.debug("Received metric batch.")
|
|
1212
|
+
yield from self._build_metrics(b.metrics)
|
|
1213
|
+
if b.observed_metrics:
|
|
1214
|
+
logger.debug("Received observed metric batch.")
|
|
1215
|
+
yield from self._build_observed_metrics(b.observed_metrics)
|
|
1216
|
+
if b.HasField("schema"):
|
|
1217
|
+
logger.debug("Received the schema.")
|
|
1218
|
+
dt = types.proto_schema_to_pyspark_data_type(b.schema)
|
|
1219
|
+
assert isinstance(dt, StructType)
|
|
1220
|
+
yield dt
|
|
1221
|
+
if b.HasField("sql_command_result"):
|
|
1222
|
+
logger.debug("Received the SQL command result.")
|
|
1223
|
+
yield {"sql_command_result": b.sql_command_result.relation}
|
|
1224
|
+
if b.HasField("write_stream_operation_start_result"):
|
|
1225
|
+
field = "write_stream_operation_start_result"
|
|
1226
|
+
yield {field: b.write_stream_operation_start_result}
|
|
1227
|
+
if b.HasField("streaming_query_command_result"):
|
|
1228
|
+
yield {"streaming_query_command_result": b.streaming_query_command_result}
|
|
1229
|
+
if b.HasField("streaming_query_manager_command_result"):
|
|
1230
|
+
cmd_result = b.streaming_query_manager_command_result
|
|
1231
|
+
yield {"streaming_query_manager_command_result": cmd_result}
|
|
1232
|
+
if b.HasField("get_resources_command_result"):
|
|
1233
|
+
resources = {}
|
|
1234
|
+
for key, resource in b.get_resources_command_result.resources.items():
|
|
1235
|
+
name = resource.name
|
|
1236
|
+
addresses = [address for address in resource.addresses]
|
|
1237
|
+
resources[key] = ResourceInformation(name, addresses)
|
|
1238
|
+
yield {"get_resources_command_result": resources}
|
|
1239
|
+
if b.HasField("arrow_batch"):
|
|
1240
|
+
logger.debug(
|
|
1241
|
+
f"Received arrow batch rows={b.arrow_batch.row_count} "
|
|
1242
|
+
f"size={len(b.arrow_batch.data)}"
|
|
1243
|
+
)
|
|
1244
|
+
|
|
1245
|
+
with pa.ipc.open_stream(b.arrow_batch.data) as reader:
|
|
1246
|
+
for batch in reader:
|
|
1247
|
+
assert isinstance(batch, pa.RecordBatch)
|
|
1248
|
+
yield batch
|
|
1249
|
+
|
|
1250
|
+
try:
|
|
1251
|
+
if self._use_reattachable_execute:
|
|
1252
|
+
# Don't use retryHandler - own retry handling is inside.
|
|
1253
|
+
generator = ExecutePlanResponseReattachableIterator(
|
|
1254
|
+
req, self._stub, self._retry_policy, self._builder.metadata()
|
|
1255
|
+
)
|
|
1256
|
+
for b in generator:
|
|
1257
|
+
yield from handle_response(b)
|
|
1258
|
+
else:
|
|
1259
|
+
for attempt in self._retrying():
|
|
1260
|
+
with attempt:
|
|
1261
|
+
for b in self._stub.ExecutePlan(req, metadata=self._builder.metadata()):
|
|
1262
|
+
yield from handle_response(b)
|
|
1263
|
+
except Exception as error:
|
|
1264
|
+
self._handle_error(error)
|
|
1265
|
+
|
|
1266
|
+
def _execute_and_fetch(
|
|
1267
|
+
self, req: pb2.ExecutePlanRequest, self_destruct: bool = False
|
|
1268
|
+
) -> Tuple[
|
|
1269
|
+
Optional["pa.Table"],
|
|
1270
|
+
Optional[StructType],
|
|
1271
|
+
List[PlanMetrics],
|
|
1272
|
+
List[PlanObservedMetrics],
|
|
1273
|
+
Dict[str, Any],
|
|
1274
|
+
]:
|
|
1275
|
+
logger.info("ExecuteAndFetch")
|
|
1276
|
+
|
|
1277
|
+
observed_metrics: List[PlanObservedMetrics] = []
|
|
1278
|
+
metrics: List[PlanMetrics] = []
|
|
1279
|
+
batches: List[pa.RecordBatch] = []
|
|
1280
|
+
schema: Optional[StructType] = None
|
|
1281
|
+
properties: Dict[str, Any] = {}
|
|
1282
|
+
|
|
1283
|
+
for response in self._execute_and_fetch_as_iterator(req):
|
|
1284
|
+
if isinstance(response, StructType):
|
|
1285
|
+
schema = response
|
|
1286
|
+
elif isinstance(response, pa.RecordBatch):
|
|
1287
|
+
batches.append(response)
|
|
1288
|
+
elif isinstance(response, PlanMetrics):
|
|
1289
|
+
metrics.append(response)
|
|
1290
|
+
elif isinstance(response, PlanObservedMetrics):
|
|
1291
|
+
observed_metrics.append(response)
|
|
1292
|
+
elif isinstance(response, dict):
|
|
1293
|
+
properties.update(**response)
|
|
1294
|
+
else:
|
|
1295
|
+
raise PySparkValueError(
|
|
1296
|
+
error_class="UNKNOWN_RESPONSE",
|
|
1297
|
+
message_parameters={
|
|
1298
|
+
"response": response,
|
|
1299
|
+
},
|
|
1300
|
+
)
|
|
1301
|
+
|
|
1302
|
+
if len(batches) > 0:
|
|
1303
|
+
if self_destruct:
|
|
1304
|
+
results = []
|
|
1305
|
+
for batch in batches:
|
|
1306
|
+
# self_destruct frees memory column-wise, but Arrow record batches are
|
|
1307
|
+
# oriented row-wise, so copies each column into its own allocation
|
|
1308
|
+
batch = pa.RecordBatch.from_arrays(
|
|
1309
|
+
[
|
|
1310
|
+
# This call actually reallocates the array
|
|
1311
|
+
pa.concat_arrays([array])
|
|
1312
|
+
for array in batch
|
|
1313
|
+
],
|
|
1314
|
+
schema=batch.schema,
|
|
1315
|
+
)
|
|
1316
|
+
results.append(batch)
|
|
1317
|
+
table = pa.Table.from_batches(batches=results)
|
|
1318
|
+
# Ensure only the table has a reference to the batches, so that
|
|
1319
|
+
# self_destruct (if enabled) is effective
|
|
1320
|
+
del results
|
|
1321
|
+
del batches
|
|
1322
|
+
else:
|
|
1323
|
+
table = pa.Table.from_batches(batches=batches)
|
|
1324
|
+
return table, schema, metrics, observed_metrics, properties
|
|
1325
|
+
else:
|
|
1326
|
+
return None, schema, metrics, observed_metrics, properties
|
|
1327
|
+
|
|
1328
|
+
def _config_request_with_metadata(self) -> pb2.ConfigRequest:
|
|
1329
|
+
req = pb2.ConfigRequest()
|
|
1330
|
+
req.session_id = self._session_id
|
|
1331
|
+
req.client_type = self._builder.userAgent
|
|
1332
|
+
if self._user_id:
|
|
1333
|
+
req.user_context.user_id = self._user_id
|
|
1334
|
+
return req
|
|
1335
|
+
|
|
1336
|
+
def get_configs(self, *keys: str) -> Tuple[Optional[str], ...]:
|
|
1337
|
+
op = pb2.ConfigRequest.Operation(get=pb2.ConfigRequest.Get(keys=keys))
|
|
1338
|
+
configs = dict(self.config(op).pairs)
|
|
1339
|
+
return tuple(configs.get(key) for key in keys)
|
|
1340
|
+
|
|
1341
|
+
def get_config_with_defaults(
|
|
1342
|
+
self, *pairs: Tuple[str, Optional[str]]
|
|
1343
|
+
) -> Tuple[Optional[str], ...]:
|
|
1344
|
+
op = pb2.ConfigRequest.Operation(
|
|
1345
|
+
get_with_default=pb2.ConfigRequest.GetWithDefault(
|
|
1346
|
+
pairs=[pb2.KeyValue(key=key, value=default) for key, default in pairs]
|
|
1347
|
+
)
|
|
1348
|
+
)
|
|
1349
|
+
configs = dict(self.config(op).pairs)
|
|
1350
|
+
return tuple(configs.get(key) for key, _ in pairs)
|
|
1351
|
+
|
|
1352
|
+
def config(self, operation: pb2.ConfigRequest.Operation) -> ConfigResult:
|
|
1353
|
+
"""
|
|
1354
|
+
Call the config RPC of Spark Connect.
|
|
1355
|
+
|
|
1356
|
+
Parameters
|
|
1357
|
+
----------
|
|
1358
|
+
operation : str
|
|
1359
|
+
Operation kind
|
|
1360
|
+
|
|
1361
|
+
Returns
|
|
1362
|
+
-------
|
|
1363
|
+
The result of the config call.
|
|
1364
|
+
"""
|
|
1365
|
+
req = self._config_request_with_metadata()
|
|
1366
|
+
req.operation.CopyFrom(operation)
|
|
1367
|
+
try:
|
|
1368
|
+
for attempt in self._retrying():
|
|
1369
|
+
with attempt:
|
|
1370
|
+
resp = self._stub.Config(req, metadata=self._builder.metadata())
|
|
1371
|
+
if resp.session_id != self._session_id:
|
|
1372
|
+
raise SparkConnectException(
|
|
1373
|
+
"Received incorrect session identifier for request:"
|
|
1374
|
+
f"{resp.session_id} != {self._session_id}"
|
|
1375
|
+
)
|
|
1376
|
+
return ConfigResult.fromProto(resp)
|
|
1377
|
+
raise SparkConnectException("Invalid state during retry exception handling.")
|
|
1378
|
+
except Exception as error:
|
|
1379
|
+
self._handle_error(error)
|
|
1380
|
+
|
|
1381
|
+
def _interrupt_request(
|
|
1382
|
+
self, interrupt_type: str, id_or_tag: Optional[str] = None
|
|
1383
|
+
) -> pb2.InterruptRequest:
|
|
1384
|
+
req = pb2.InterruptRequest()
|
|
1385
|
+
req.session_id = self._session_id
|
|
1386
|
+
req.client_type = self._builder.userAgent
|
|
1387
|
+
if interrupt_type == "all":
|
|
1388
|
+
req.interrupt_type = pb2.InterruptRequest.InterruptType.INTERRUPT_TYPE_ALL
|
|
1389
|
+
elif interrupt_type == "tag":
|
|
1390
|
+
assert id_or_tag is not None
|
|
1391
|
+
req.interrupt_type = pb2.InterruptRequest.InterruptType.INTERRUPT_TYPE_TAG
|
|
1392
|
+
req.operation_tag = id_or_tag
|
|
1393
|
+
elif interrupt_type == "operation":
|
|
1394
|
+
assert id_or_tag is not None
|
|
1395
|
+
req.interrupt_type = pb2.InterruptRequest.InterruptType.INTERRUPT_TYPE_OPERATION_ID
|
|
1396
|
+
req.operation_id = id_or_tag
|
|
1397
|
+
else:
|
|
1398
|
+
raise PySparkValueError(
|
|
1399
|
+
error_class="UNKNOWN_INTERRUPT_TYPE",
|
|
1400
|
+
message_parameters={
|
|
1401
|
+
"interrupt_type": str(interrupt_type),
|
|
1402
|
+
},
|
|
1403
|
+
)
|
|
1404
|
+
if self._user_id:
|
|
1405
|
+
req.user_context.user_id = self._user_id
|
|
1406
|
+
return req
|
|
1407
|
+
|
|
1408
|
+
def interrupt_all(self) -> Optional[List[str]]:
|
|
1409
|
+
req = self._interrupt_request("all")
|
|
1410
|
+
try:
|
|
1411
|
+
for attempt in self._retrying():
|
|
1412
|
+
with attempt:
|
|
1413
|
+
resp = self._stub.Interrupt(req, metadata=self._builder.metadata())
|
|
1414
|
+
if resp.session_id != self._session_id:
|
|
1415
|
+
raise SparkConnectException(
|
|
1416
|
+
"Received incorrect session identifier for request:"
|
|
1417
|
+
f"{resp.session_id} != {self._session_id}"
|
|
1418
|
+
)
|
|
1419
|
+
return list(resp.interrupted_ids)
|
|
1420
|
+
raise SparkConnectException("Invalid state during retry exception handling.")
|
|
1421
|
+
except Exception as error:
|
|
1422
|
+
self._handle_error(error)
|
|
1423
|
+
|
|
1424
|
+
def interrupt_tag(self, tag: str) -> Optional[List[str]]:
|
|
1425
|
+
req = self._interrupt_request("tag", tag)
|
|
1426
|
+
try:
|
|
1427
|
+
for attempt in self._retrying():
|
|
1428
|
+
with attempt:
|
|
1429
|
+
resp = self._stub.Interrupt(req, metadata=self._builder.metadata())
|
|
1430
|
+
if resp.session_id != self._session_id:
|
|
1431
|
+
raise SparkConnectException(
|
|
1432
|
+
"Received incorrect session identifier for request:"
|
|
1433
|
+
f"{resp.session_id} != {self._session_id}"
|
|
1434
|
+
)
|
|
1435
|
+
return list(resp.interrupted_ids)
|
|
1436
|
+
raise SparkConnectException("Invalid state during retry exception handling.")
|
|
1437
|
+
except Exception as error:
|
|
1438
|
+
self._handle_error(error)
|
|
1439
|
+
|
|
1440
|
+
def interrupt_operation(self, op_id: str) -> Optional[List[str]]:
|
|
1441
|
+
req = self._interrupt_request("operation", op_id)
|
|
1442
|
+
try:
|
|
1443
|
+
for attempt in self._retrying():
|
|
1444
|
+
with attempt:
|
|
1445
|
+
resp = self._stub.Interrupt(req, metadata=self._builder.metadata())
|
|
1446
|
+
if resp.session_id != self._session_id:
|
|
1447
|
+
raise SparkConnectException(
|
|
1448
|
+
"Received incorrect session identifier for request:"
|
|
1449
|
+
f"{resp.session_id} != {self._session_id}"
|
|
1450
|
+
)
|
|
1451
|
+
return list(resp.interrupted_ids)
|
|
1452
|
+
raise SparkConnectException("Invalid state during retry exception handling.")
|
|
1453
|
+
except Exception as error:
|
|
1454
|
+
self._handle_error(error)
|
|
1455
|
+
|
|
1456
|
+
def add_tag(self, tag: str) -> None:
|
|
1457
|
+
self._throw_if_invalid_tag(tag)
|
|
1458
|
+
if not hasattr(self.thread_local, "tags"):
|
|
1459
|
+
self.thread_local.tags = set()
|
|
1460
|
+
self.thread_local.tags.add(tag)
|
|
1461
|
+
|
|
1462
|
+
def remove_tag(self, tag: str) -> None:
|
|
1463
|
+
self._throw_if_invalid_tag(tag)
|
|
1464
|
+
if not hasattr(self.thread_local, "tags"):
|
|
1465
|
+
self.thread_local.tags = set()
|
|
1466
|
+
self.thread_local.tags.remove(tag)
|
|
1467
|
+
|
|
1468
|
+
def get_tags(self) -> Set[str]:
|
|
1469
|
+
if not hasattr(self.thread_local, "tags"):
|
|
1470
|
+
self.thread_local.tags = set()
|
|
1471
|
+
return self.thread_local.tags
|
|
1472
|
+
|
|
1473
|
+
def clear_tags(self) -> None:
|
|
1474
|
+
self.thread_local.tags = set()
|
|
1475
|
+
|
|
1476
|
+
def _throw_if_invalid_tag(self, tag: str) -> None:
|
|
1477
|
+
"""
|
|
1478
|
+
Validate if a tag for ExecutePlanRequest.tags is valid. Throw ``ValueError`` if
|
|
1479
|
+
not.
|
|
1480
|
+
"""
|
|
1481
|
+
spark_job_tags_sep = ","
|
|
1482
|
+
if tag is None:
|
|
1483
|
+
raise ValueError("Spark Connect tag cannot be null.")
|
|
1484
|
+
if spark_job_tags_sep in tag:
|
|
1485
|
+
raise ValueError(f"Spark Connect tag cannot contain '{spark_job_tags_sep}'.")
|
|
1486
|
+
if len(tag) == 0:
|
|
1487
|
+
raise ValueError("Spark Connect tag cannot be an empty string.")
|
|
1488
|
+
|
|
1489
|
+
def _handle_error(self, error: Exception) -> NoReturn:
|
|
1490
|
+
"""
|
|
1491
|
+
Handle errors that occur during RPC calls.
|
|
1492
|
+
|
|
1493
|
+
Parameters
|
|
1494
|
+
----------
|
|
1495
|
+
error : Exception
|
|
1496
|
+
An exception thrown during RPC calls.
|
|
1497
|
+
|
|
1498
|
+
Returns
|
|
1499
|
+
-------
|
|
1500
|
+
Throws the appropriate internal Python exception.
|
|
1501
|
+
"""
|
|
1502
|
+
if isinstance(error, grpc.RpcError):
|
|
1503
|
+
self._handle_rpc_error(error)
|
|
1504
|
+
elif isinstance(error, ValueError):
|
|
1505
|
+
if "Cannot invoke RPC" in str(error) and "closed" in str(error):
|
|
1506
|
+
raise SparkConnectException(
|
|
1507
|
+
error_class="NO_ACTIVE_SESSION", message_parameters=dict()
|
|
1508
|
+
) from None
|
|
1509
|
+
raise error
|
|
1510
|
+
|
|
1511
|
+
def _handle_rpc_error(self, rpc_error: grpc.RpcError) -> NoReturn:
|
|
1512
|
+
"""
|
|
1513
|
+
Error handling helper for dealing with GRPC Errors. On the server side, certain
|
|
1514
|
+
exceptions are enriched with additional RPC Status information. These are
|
|
1515
|
+
unpacked in this function and put into the exception.
|
|
1516
|
+
|
|
1517
|
+
To avoid overloading the user with GRPC errors, this message explicitly
|
|
1518
|
+
swallows the error context from the call. This GRPC Error is logged however,
|
|
1519
|
+
and can be enabled.
|
|
1520
|
+
|
|
1521
|
+
Parameters
|
|
1522
|
+
----------
|
|
1523
|
+
rpc_error : grpc.RpcError
|
|
1524
|
+
RPC Error containing the details of the exception.
|
|
1525
|
+
|
|
1526
|
+
Returns
|
|
1527
|
+
-------
|
|
1528
|
+
Throws the appropriate internal Python exception.
|
|
1529
|
+
"""
|
|
1530
|
+
logger.exception("GRPC Error received")
|
|
1531
|
+
# We have to cast the value here because, a RpcError is a Call as well.
|
|
1532
|
+
# https://grpc.github.io/grpc/python/grpc.html#grpc.UnaryUnaryMultiCallable.__call__
|
|
1533
|
+
status = rpc_status.from_call(cast(grpc.Call, rpc_error))
|
|
1534
|
+
if status:
|
|
1535
|
+
for d in status.details:
|
|
1536
|
+
if d.Is(error_details_pb2.ErrorInfo.DESCRIPTOR):
|
|
1537
|
+
info = error_details_pb2.ErrorInfo()
|
|
1538
|
+
d.Unpack(info)
|
|
1539
|
+
raise convert_exception(info, status.message) from None
|
|
1540
|
+
|
|
1541
|
+
raise SparkConnectGrpcException(status.message) from None
|
|
1542
|
+
else:
|
|
1543
|
+
raise SparkConnectGrpcException(str(rpc_error)) from None
|
|
1544
|
+
|
|
1545
|
+
def add_artifacts(self, *path: str, pyfile: bool, archive: bool, file: bool) -> None:
|
|
1546
|
+
self._artifact_manager.add_artifacts(*path, pyfile=pyfile, archive=archive, file=file)
|
|
1547
|
+
|
|
1548
|
+
def copy_from_local_to_fs(self, local_path: str, dest_path: str) -> None:
|
|
1549
|
+
self._artifact_manager._add_forward_to_fs_artifacts(local_path, dest_path)
|
|
1550
|
+
|
|
1551
|
+
def cache_artifact(self, blob: bytes) -> str:
|
|
1552
|
+
return self._artifact_manager.cache_artifact(blob)
|
|
1553
|
+
|
|
1554
|
+
|
|
1555
|
+
class RetryState:
|
|
1556
|
+
"""
|
|
1557
|
+
Simple state helper that captures the state between retries of the exceptions. It
|
|
1558
|
+
keeps track of the last exception thrown and how many in total. When the task
|
|
1559
|
+
finishes successfully done() returns True.
|
|
1560
|
+
"""
|
|
1561
|
+
|
|
1562
|
+
def __init__(self) -> None:
|
|
1563
|
+
self._exception: Optional[BaseException] = None
|
|
1564
|
+
self._done = False
|
|
1565
|
+
self._count = 0
|
|
1566
|
+
|
|
1567
|
+
def set_exception(self, exc: BaseException) -> None:
|
|
1568
|
+
self._exception = exc
|
|
1569
|
+
self._count += 1
|
|
1570
|
+
|
|
1571
|
+
def throw(self) -> None:
|
|
1572
|
+
if self._exception is None:
|
|
1573
|
+
raise RuntimeError("No exception is set")
|
|
1574
|
+
raise self._exception
|
|
1575
|
+
|
|
1576
|
+
def set_done(self) -> None:
|
|
1577
|
+
self._done = True
|
|
1578
|
+
|
|
1579
|
+
def count(self) -> int:
|
|
1580
|
+
return self._count
|
|
1581
|
+
|
|
1582
|
+
def done(self) -> bool:
|
|
1583
|
+
return self._done
|
|
1584
|
+
|
|
1585
|
+
|
|
1586
|
+
class AttemptManager:
|
|
1587
|
+
"""
|
|
1588
|
+
Simple ContextManager that is used to capture the exception thrown inside the context.
|
|
1589
|
+
"""
|
|
1590
|
+
|
|
1591
|
+
def __init__(self, check: Callable[..., bool], retry_state: RetryState) -> None:
|
|
1592
|
+
self._retry_state = retry_state
|
|
1593
|
+
self._can_retry = check
|
|
1594
|
+
|
|
1595
|
+
def __enter__(self) -> None:
|
|
1596
|
+
pass
|
|
1597
|
+
|
|
1598
|
+
def __exit__(
|
|
1599
|
+
self,
|
|
1600
|
+
exc_type: Optional[Type[BaseException]],
|
|
1601
|
+
exc_val: Optional[BaseException],
|
|
1602
|
+
exc_tb: Optional[TracebackType],
|
|
1603
|
+
) -> Optional[bool]:
|
|
1604
|
+
if isinstance(exc_val, BaseException):
|
|
1605
|
+
# Swallow the exception.
|
|
1606
|
+
if self._can_retry(exc_val) or isinstance(exc_val, RetryException):
|
|
1607
|
+
self._retry_state.set_exception(exc_val)
|
|
1608
|
+
return True
|
|
1609
|
+
# Bubble up the exception.
|
|
1610
|
+
return False
|
|
1611
|
+
else:
|
|
1612
|
+
self._retry_state.set_done()
|
|
1613
|
+
return None
|
|
1614
|
+
|
|
1615
|
+
def is_first_try(self) -> bool:
|
|
1616
|
+
return self._retry_state._count == 0
|
|
1617
|
+
|
|
1618
|
+
|
|
1619
|
+
class Retrying:
|
|
1620
|
+
"""
|
|
1621
|
+
This helper class is used as a generator together with a context manager to
|
|
1622
|
+
allow retrying exceptions in particular code blocks. The Retrying can be configured
|
|
1623
|
+
with a lambda function that is can be filtered what kind of exceptions should be
|
|
1624
|
+
retried.
|
|
1625
|
+
|
|
1626
|
+
In addition, there are several parameters that are used to configure the exponential
|
|
1627
|
+
backoff behavior.
|
|
1628
|
+
|
|
1629
|
+
An example to use this class looks like this:
|
|
1630
|
+
|
|
1631
|
+
.. code-block:: python
|
|
1632
|
+
|
|
1633
|
+
for attempt in Retrying(can_retry=lambda x: isinstance(x, TransientError)):
|
|
1634
|
+
with attempt:
|
|
1635
|
+
# do the work.
|
|
1636
|
+
|
|
1637
|
+
"""
|
|
1638
|
+
|
|
1639
|
+
def __init__(
|
|
1640
|
+
self,
|
|
1641
|
+
max_retries: int,
|
|
1642
|
+
initial_backoff: int,
|
|
1643
|
+
max_backoff: int,
|
|
1644
|
+
backoff_multiplier: float,
|
|
1645
|
+
jitter: int,
|
|
1646
|
+
min_jitter_threshold: int,
|
|
1647
|
+
can_retry: Callable[..., bool] = lambda x: True,
|
|
1648
|
+
sleep: Callable[[float], None] = time.sleep,
|
|
1649
|
+
) -> None:
|
|
1650
|
+
self._can_retry = can_retry
|
|
1651
|
+
self._max_retries = max_retries
|
|
1652
|
+
self._initial_backoff = initial_backoff
|
|
1653
|
+
self._max_backoff = max_backoff
|
|
1654
|
+
self._backoff_multiplier = backoff_multiplier
|
|
1655
|
+
self._jitter = jitter
|
|
1656
|
+
self._min_jitter_threshold = min_jitter_threshold
|
|
1657
|
+
self._sleep = sleep
|
|
1658
|
+
|
|
1659
|
+
def __iter__(self) -> Generator[AttemptManager, None, None]:
|
|
1660
|
+
"""
|
|
1661
|
+
Generator function to wrap the exception producing code block.
|
|
1662
|
+
|
|
1663
|
+
Returns
|
|
1664
|
+
-------
|
|
1665
|
+
A generator that yields the current attempt.
|
|
1666
|
+
"""
|
|
1667
|
+
retry_state = RetryState()
|
|
1668
|
+
next_backoff: float = self._initial_backoff
|
|
1669
|
+
|
|
1670
|
+
if self._max_retries < 0:
|
|
1671
|
+
raise ValueError("Can't have negative number of retries")
|
|
1672
|
+
|
|
1673
|
+
while not retry_state.done() and retry_state.count() <= self._max_retries:
|
|
1674
|
+
# Do backoff
|
|
1675
|
+
if retry_state.count() > 0:
|
|
1676
|
+
# Randomize backoff for this iteration
|
|
1677
|
+
backoff = next_backoff
|
|
1678
|
+
next_backoff = min(self._max_backoff, next_backoff * self._backoff_multiplier)
|
|
1679
|
+
|
|
1680
|
+
if backoff >= self._min_jitter_threshold:
|
|
1681
|
+
backoff += random.uniform(0, self._jitter)
|
|
1682
|
+
|
|
1683
|
+
logger.debug(f"Retrying call after {backoff} ms sleep")
|
|
1684
|
+
self._sleep(backoff / 1000.0)
|
|
1685
|
+
yield AttemptManager(self._can_retry, retry_state)
|
|
1686
|
+
|
|
1687
|
+
if not retry_state.done():
|
|
1688
|
+
# Exceeded number of retries, throw last exception we had
|
|
1689
|
+
retry_state.throw()
|