snowpark-connect 0.20.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of snowpark-connect might be problematic. Click here for more details.
- snowflake/snowpark_connect/__init__.py +23 -0
- snowflake/snowpark_connect/analyze_plan/__init__.py +3 -0
- snowflake/snowpark_connect/analyze_plan/map_tree_string.py +38 -0
- snowflake/snowpark_connect/column_name_handler.py +735 -0
- snowflake/snowpark_connect/config.py +576 -0
- snowflake/snowpark_connect/constants.py +47 -0
- snowflake/snowpark_connect/control_server.py +52 -0
- snowflake/snowpark_connect/dataframe_name_handler.py +54 -0
- snowflake/snowpark_connect/date_time_format_mapping.py +399 -0
- snowflake/snowpark_connect/empty_dataframe.py +18 -0
- snowflake/snowpark_connect/error/__init__.py +11 -0
- snowflake/snowpark_connect/error/error_mapping.py +6174 -0
- snowflake/snowpark_connect/error/error_utils.py +321 -0
- snowflake/snowpark_connect/error/exceptions.py +24 -0
- snowflake/snowpark_connect/execute_plan/__init__.py +3 -0
- snowflake/snowpark_connect/execute_plan/map_execution_command.py +204 -0
- snowflake/snowpark_connect/execute_plan/map_execution_root.py +173 -0
- snowflake/snowpark_connect/execute_plan/utils.py +183 -0
- snowflake/snowpark_connect/expression/__init__.py +3 -0
- snowflake/snowpark_connect/expression/literal.py +90 -0
- snowflake/snowpark_connect/expression/map_cast.py +343 -0
- snowflake/snowpark_connect/expression/map_expression.py +293 -0
- snowflake/snowpark_connect/expression/map_extension.py +104 -0
- snowflake/snowpark_connect/expression/map_sql_expression.py +633 -0
- snowflake/snowpark_connect/expression/map_udf.py +142 -0
- snowflake/snowpark_connect/expression/map_unresolved_attribute.py +241 -0
- snowflake/snowpark_connect/expression/map_unresolved_extract_value.py +85 -0
- snowflake/snowpark_connect/expression/map_unresolved_function.py +9450 -0
- snowflake/snowpark_connect/expression/map_unresolved_star.py +218 -0
- snowflake/snowpark_connect/expression/map_update_fields.py +164 -0
- snowflake/snowpark_connect/expression/map_window_function.py +258 -0
- snowflake/snowpark_connect/expression/typer.py +125 -0
- snowflake/snowpark_connect/includes/__init__.py +0 -0
- snowflake/snowpark_connect/includes/jars/antlr4-runtime-4.9.3.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-cli-1.5.0.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-codec-1.16.1.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-collections-3.2.2.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-collections4-4.4.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-compiler-3.1.9.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-compress-1.26.0.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-crypto-1.1.0.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-dbcp-1.4.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-io-2.16.1.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-lang-2.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-lang3-3.12.0.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-logging-1.1.3.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-math3-3.6.1.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-pool-1.5.4.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-text-1.10.0.jar +0 -0
- snowflake/snowpark_connect/includes/jars/hadoop-client-api-3.3.4.jar +0 -0
- snowflake/snowpark_connect/includes/jars/jackson-annotations-2.15.2.jar +0 -0
- snowflake/snowpark_connect/includes/jars/jackson-core-2.15.2.jar +0 -0
- snowflake/snowpark_connect/includes/jars/jackson-core-asl-1.9.13.jar +0 -0
- snowflake/snowpark_connect/includes/jars/jackson-databind-2.15.2.jar +0 -0
- snowflake/snowpark_connect/includes/jars/jackson-dataformat-yaml-2.15.2.jar +0 -0
- snowflake/snowpark_connect/includes/jars/jackson-datatype-jsr310-2.15.2.jar +0 -0
- snowflake/snowpark_connect/includes/jars/jackson-mapper-asl-1.9.13.jar +0 -0
- snowflake/snowpark_connect/includes/jars/jackson-module-scala_2.12-2.15.2.jar +0 -0
- snowflake/snowpark_connect/includes/jars/json4s-ast_2.12-3.7.0-M11.jar +0 -0
- snowflake/snowpark_connect/includes/jars/json4s-core_2.12-3.7.0-M11.jar +0 -0
- snowflake/snowpark_connect/includes/jars/json4s-jackson_2.12-3.7.0-M11.jar +0 -0
- snowflake/snowpark_connect/includes/jars/json4s-scalap_2.12-3.7.0-M11.jar +0 -0
- snowflake/snowpark_connect/includes/jars/kryo-shaded-4.0.2.jar +0 -0
- snowflake/snowpark_connect/includes/jars/log4j-1.2-api-2.20.0.jar +0 -0
- snowflake/snowpark_connect/includes/jars/log4j-api-2.20.0.jar +0 -0
- snowflake/snowpark_connect/includes/jars/log4j-core-2.20.0.jar +0 -0
- snowflake/snowpark_connect/includes/jars/log4j-slf4j2-impl-2.20.0.jar +0 -0
- snowflake/snowpark_connect/includes/jars/paranamer-2.8.jar +0 -0
- snowflake/snowpark_connect/includes/jars/scala-collection-compat_2.12-2.7.0.jar +0 -0
- snowflake/snowpark_connect/includes/jars/scala-compiler-2.12.18.jar +0 -0
- snowflake/snowpark_connect/includes/jars/scala-library-2.12.18.jar +0 -0
- snowflake/snowpark_connect/includes/jars/scala-parser-combinators_2.12-2.3.0.jar +0 -0
- snowflake/snowpark_connect/includes/jars/scala-reflect-2.12.18.jar +0 -0
- snowflake/snowpark_connect/includes/jars/scala-xml_2.12-2.1.0.jar +0 -0
- snowflake/snowpark_connect/includes/jars/slf4j-api-2.0.7.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-catalyst_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-common-utils_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-core_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-graphx_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-hive-thriftserver_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-hive_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-kubernetes_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-kvstore_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-launcher_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-mesos_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-mllib-local_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-mllib_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-network-common_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-network-shuffle_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-repl_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-sketch_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-sql-api_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-sql_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-streaming_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-tags_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-unsafe_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-yarn_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/python/__init__.py +21 -0
- snowflake/snowpark_connect/includes/python/pyspark/__init__.py +173 -0
- snowflake/snowpark_connect/includes/python/pyspark/_globals.py +71 -0
- snowflake/snowpark_connect/includes/python/pyspark/_typing.pyi +43 -0
- snowflake/snowpark_connect/includes/python/pyspark/accumulators.py +341 -0
- snowflake/snowpark_connect/includes/python/pyspark/broadcast.py +383 -0
- snowflake/snowpark_connect/includes/python/pyspark/cloudpickle/__init__.py +8 -0
- snowflake/snowpark_connect/includes/python/pyspark/cloudpickle/cloudpickle.py +948 -0
- snowflake/snowpark_connect/includes/python/pyspark/cloudpickle/cloudpickle_fast.py +844 -0
- snowflake/snowpark_connect/includes/python/pyspark/cloudpickle/compat.py +18 -0
- snowflake/snowpark_connect/includes/python/pyspark/conf.py +276 -0
- snowflake/snowpark_connect/includes/python/pyspark/context.py +2601 -0
- snowflake/snowpark_connect/includes/python/pyspark/daemon.py +218 -0
- snowflake/snowpark_connect/includes/python/pyspark/errors/__init__.py +70 -0
- snowflake/snowpark_connect/includes/python/pyspark/errors/error_classes.py +889 -0
- snowflake/snowpark_connect/includes/python/pyspark/errors/exceptions/__init__.py +16 -0
- snowflake/snowpark_connect/includes/python/pyspark/errors/exceptions/base.py +228 -0
- snowflake/snowpark_connect/includes/python/pyspark/errors/exceptions/captured.py +307 -0
- snowflake/snowpark_connect/includes/python/pyspark/errors/exceptions/connect.py +190 -0
- snowflake/snowpark_connect/includes/python/pyspark/errors/tests/__init__.py +16 -0
- snowflake/snowpark_connect/includes/python/pyspark/errors/tests/test_errors.py +60 -0
- snowflake/snowpark_connect/includes/python/pyspark/errors/utils.py +116 -0
- snowflake/snowpark_connect/includes/python/pyspark/files.py +165 -0
- snowflake/snowpark_connect/includes/python/pyspark/find_spark_home.py +95 -0
- snowflake/snowpark_connect/includes/python/pyspark/install.py +203 -0
- snowflake/snowpark_connect/includes/python/pyspark/instrumentation_utils.py +190 -0
- snowflake/snowpark_connect/includes/python/pyspark/java_gateway.py +248 -0
- snowflake/snowpark_connect/includes/python/pyspark/join.py +118 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/__init__.py +71 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/_typing.pyi +84 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/base.py +414 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/classification.py +4332 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/clustering.py +2188 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/common.py +146 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/connect/__init__.py +44 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/connect/base.py +346 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/connect/classification.py +382 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/connect/evaluation.py +291 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/connect/feature.py +258 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/connect/functions.py +77 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/connect/io_utils.py +335 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/connect/pipeline.py +262 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/connect/summarizer.py +120 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/connect/tuning.py +579 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/connect/util.py +173 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/deepspeed/__init__.py +16 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/deepspeed/deepspeed_distributor.py +165 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/deepspeed/tests/test_deepspeed_distributor.py +306 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/dl_util.py +150 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/evaluation.py +1166 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/feature.py +7474 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/fpm.py +543 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/functions.py +842 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/image.py +271 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/linalg/__init__.py +1382 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/model_cache.py +55 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/param/__init__.py +602 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/param/_shared_params_code_gen.py +368 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/param/shared.py +878 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/pipeline.py +451 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/recommendation.py +748 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/regression.py +3335 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/stat.py +523 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/__init__.py +16 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_connect_classification.py +53 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_connect_evaluation.py +50 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_connect_feature.py +43 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_connect_function.py +114 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_connect_pipeline.py +47 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_connect_summarizer.py +43 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_connect_tuning.py +46 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_legacy_mode_classification.py +238 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_legacy_mode_evaluation.py +194 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_legacy_mode_feature.py +156 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_legacy_mode_pipeline.py +184 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_legacy_mode_summarizer.py +78 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_legacy_mode_tuning.py +292 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_parity_torch_data_loader.py +50 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_parity_torch_distributor.py +152 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_algorithms.py +456 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_base.py +96 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_dl_util.py +186 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_evaluation.py +77 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_feature.py +401 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_functions.py +528 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_image.py +82 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_linalg.py +409 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_model_cache.py +55 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_param.py +441 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_persistence.py +546 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_pipeline.py +71 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_stat.py +52 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_training_summary.py +494 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_util.py +85 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_wrapper.py +138 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/tuning/__init__.py +16 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/tuning/test_cv_io_basic.py +151 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/tuning/test_cv_io_nested.py +97 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/tuning/test_cv_io_pipeline.py +143 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/tuning/test_tuning.py +551 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/tuning/test_tvs_io_basic.py +137 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/tuning/test_tvs_io_nested.py +96 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/tuning/test_tvs_io_pipeline.py +142 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/torch/__init__.py +16 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/torch/data.py +100 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/torch/distributor.py +1133 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/torch/log_communication.py +198 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/torch/tests/__init__.py +16 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/torch/tests/test_data_loader.py +137 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/torch/tests/test_distributor.py +561 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/torch/tests/test_log_communication.py +172 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/torch/torch_run_process_wrapper.py +83 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/tree.py +434 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/tuning.py +1741 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/util.py +749 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/wrapper.py +465 -0
- snowflake/snowpark_connect/includes/python/pyspark/mllib/__init__.py +44 -0
- snowflake/snowpark_connect/includes/python/pyspark/mllib/_typing.pyi +33 -0
- snowflake/snowpark_connect/includes/python/pyspark/mllib/classification.py +989 -0
- snowflake/snowpark_connect/includes/python/pyspark/mllib/clustering.py +1318 -0
- snowflake/snowpark_connect/includes/python/pyspark/mllib/common.py +174 -0
- snowflake/snowpark_connect/includes/python/pyspark/mllib/evaluation.py +691 -0
- snowflake/snowpark_connect/includes/python/pyspark/mllib/feature.py +1085 -0
- snowflake/snowpark_connect/includes/python/pyspark/mllib/fpm.py +233 -0
- snowflake/snowpark_connect/includes/python/pyspark/mllib/linalg/__init__.py +1653 -0
- snowflake/snowpark_connect/includes/python/pyspark/mllib/linalg/distributed.py +1662 -0
- snowflake/snowpark_connect/includes/python/pyspark/mllib/random.py +698 -0
- snowflake/snowpark_connect/includes/python/pyspark/mllib/recommendation.py +389 -0
- snowflake/snowpark_connect/includes/python/pyspark/mllib/regression.py +1067 -0
- snowflake/snowpark_connect/includes/python/pyspark/mllib/stat/KernelDensity.py +59 -0
- snowflake/snowpark_connect/includes/python/pyspark/mllib/stat/__init__.py +34 -0
- snowflake/snowpark_connect/includes/python/pyspark/mllib/stat/_statistics.py +409 -0
- snowflake/snowpark_connect/includes/python/pyspark/mllib/stat/distribution.py +39 -0
- snowflake/snowpark_connect/includes/python/pyspark/mllib/stat/test.py +86 -0
- snowflake/snowpark_connect/includes/python/pyspark/mllib/tests/__init__.py +16 -0
- snowflake/snowpark_connect/includes/python/pyspark/mllib/tests/test_algorithms.py +353 -0
- snowflake/snowpark_connect/includes/python/pyspark/mllib/tests/test_feature.py +192 -0
- snowflake/snowpark_connect/includes/python/pyspark/mllib/tests/test_linalg.py +680 -0
- snowflake/snowpark_connect/includes/python/pyspark/mllib/tests/test_stat.py +206 -0
- snowflake/snowpark_connect/includes/python/pyspark/mllib/tests/test_streaming_algorithms.py +471 -0
- snowflake/snowpark_connect/includes/python/pyspark/mllib/tests/test_util.py +108 -0
- snowflake/snowpark_connect/includes/python/pyspark/mllib/tree.py +888 -0
- snowflake/snowpark_connect/includes/python/pyspark/mllib/util.py +659 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/__init__.py +165 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/_typing.py +52 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/accessors.py +989 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/base.py +1804 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/categorical.py +822 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/config.py +539 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/correlation.py +262 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/data_type_ops/__init__.py +16 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/data_type_ops/base.py +519 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/data_type_ops/binary_ops.py +98 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/data_type_ops/boolean_ops.py +426 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/data_type_ops/categorical_ops.py +141 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/data_type_ops/complex_ops.py +145 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/data_type_ops/date_ops.py +127 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/data_type_ops/datetime_ops.py +171 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/data_type_ops/null_ops.py +83 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/data_type_ops/num_ops.py +588 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/data_type_ops/string_ops.py +154 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/data_type_ops/timedelta_ops.py +101 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/data_type_ops/udt_ops.py +29 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/datetimes.py +891 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/exceptions.py +150 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/extensions.py +388 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/frame.py +13738 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/generic.py +3560 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/groupby.py +4448 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/indexes/__init__.py +21 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/indexes/base.py +2783 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/indexes/category.py +773 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/indexes/datetimes.py +843 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/indexes/multi.py +1323 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/indexes/numeric.py +210 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/indexes/timedelta.py +197 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/indexing.py +1862 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/internal.py +1680 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/missing/__init__.py +48 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/missing/common.py +76 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/missing/frame.py +63 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/missing/general_functions.py +43 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/missing/groupby.py +93 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/missing/indexes.py +184 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/missing/resample.py +101 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/missing/scalars.py +29 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/missing/series.py +69 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/missing/window.py +168 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/mlflow.py +238 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/namespace.py +3807 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/numpy_compat.py +260 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/plot/__init__.py +17 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/plot/core.py +1213 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/plot/matplotlib.py +928 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/plot/plotly.py +261 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/resample.py +816 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/series.py +7440 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/sql_formatter.py +308 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/sql_processor.py +394 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/strings.py +2371 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/supported_api_gen.py +378 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/__init__.py +16 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/__init__.py +16 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_any_all.py +177 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_apply_func.py +575 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_binary_ops.py +235 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_combine.py +653 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_compute.py +463 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_corrwith.py +86 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_cov.py +151 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_cumulative.py +139 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_describe.py +458 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_eval.py +86 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_melt.py +202 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_missing_data.py +520 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_pivot.py +361 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/__init__.py +16 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/__init__.py +16 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_any_all.py +40 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_apply_func.py +42 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_binary_ops.py +40 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_combine.py +37 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_compute.py +60 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_corrwith.py +40 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_cov.py +40 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_cumulative.py +90 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_describe.py +40 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_eval.py +40 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_melt.py +40 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_missing_data.py +42 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_pivot.py +37 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/__init__.py +16 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_base.py +36 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_binary_ops.py +42 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_boolean_ops.py +47 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_categorical_ops.py +55 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_complex_ops.py +40 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_date_ops.py +47 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_datetime_ops.py +47 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_null_ops.py +42 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_num_arithmetic.py +43 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_num_ops.py +47 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_num_reverse.py +43 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_string_ops.py +47 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_timedelta_ops.py +47 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_udt_ops.py +40 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/testing_utils.py +226 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/__init__.py +16 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_align.py +39 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_basic_slow.py +55 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_cov_corrwith.py +39 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_dot_frame.py +39 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_dot_series.py +39 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_index.py +39 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_series.py +39 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_setitem_frame.py +43 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_setitem_series.py +43 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/__init__.py +16 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_attrs.py +40 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_constructor.py +39 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_conversion.py +42 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_reindexing.py +42 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_reshaping.py +37 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_spark.py +40 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_take.py +42 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_time_series.py +48 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_truncate.py +40 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/__init__.py +16 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_aggregate.py +40 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_apply_func.py +41 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_cumulative.py +67 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_describe.py +40 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_groupby.py +55 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_head_tail.py +40 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_index.py +38 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_missing_data.py +55 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_split_apply.py +39 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_stat.py +38 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/__init__.py +16 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_align.py +40 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_base.py +50 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_category.py +73 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_datetime.py +39 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_indexing.py +40 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_reindex.py +40 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_rename.py +40 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_reset_index.py +48 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_timedelta.py +39 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/io/__init__.py +16 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/io/test_parity_io.py +40 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/plot/__init__.py +16 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/plot/test_parity_frame_plot.py +45 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/plot/test_parity_frame_plot_matplotlib.py +45 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/plot/test_parity_frame_plot_plotly.py +49 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/plot/test_parity_series_plot.py +37 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/plot/test_parity_series_plot_matplotlib.py +53 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/plot/test_parity_series_plot_plotly.py +45 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/__init__.py +16 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_all_any.py +38 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_arg_ops.py +37 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_as_of.py +37 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_as_type.py +38 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_compute.py +37 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_conversion.py +40 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_cumulative.py +40 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_index.py +38 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_missing_data.py +40 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_series.py +37 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_sort.py +38 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_stat.py +38 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_categorical.py +66 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_config.py +37 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_csv.py +37 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_dataframe_conversion.py +42 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_dataframe_spark_io.py +39 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_default_index.py +49 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_ewm.py +37 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_expanding.py +39 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_extension.py +49 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_frame_spark.py +53 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_generic_functions.py +43 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_indexing.py +49 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_indexops_spark.py +39 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_internal.py +41 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_namespace.py +39 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_numpy_compat.py +60 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_ops_on_diff_frames.py +48 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_ops_on_diff_frames_groupby.py +39 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_ops_on_diff_frames_groupby_expanding.py +44 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_ops_on_diff_frames_groupby_rolling.py +84 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_repr.py +37 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_resample.py +45 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_reshape.py +39 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_rolling.py +39 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_scalars.py +37 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_series_conversion.py +39 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_series_datetime.py +39 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_series_string.py +39 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_spark_functions.py +39 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_sql.py +43 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_stats.py +37 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_typedef.py +36 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_utils.py +37 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_window.py +39 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/__init__.py +16 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_base.py +107 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_binary_ops.py +224 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_boolean_ops.py +825 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_categorical_ops.py +562 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_complex_ops.py +368 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_date_ops.py +257 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_datetime_ops.py +260 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_null_ops.py +178 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_num_arithmetic.py +184 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_num_ops.py +497 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_num_reverse.py +140 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_string_ops.py +354 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_timedelta_ops.py +219 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_udt_ops.py +192 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/testing_utils.py +228 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/__init__.py +16 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_align.py +118 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_basic_slow.py +198 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_cov_corrwith.py +181 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_dot_frame.py +103 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_dot_series.py +141 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_index.py +109 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_series.py +136 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_setitem_frame.py +125 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_setitem_series.py +217 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/__init__.py +16 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_attrs.py +384 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_constructor.py +598 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_conversion.py +73 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_reindexing.py +869 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_reshaping.py +487 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_spark.py +309 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_take.py +156 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_time_series.py +149 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_truncate.py +163 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/__init__.py +16 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_aggregate.py +311 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_apply_func.py +524 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_cumulative.py +419 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_describe.py +144 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_groupby.py +979 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_head_tail.py +234 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_index.py +206 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_missing_data.py +421 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_split_apply.py +187 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_stat.py +397 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/__init__.py +16 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_align.py +100 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_base.py +2743 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_category.py +484 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_datetime.py +276 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_indexing.py +432 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_reindex.py +310 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_rename.py +257 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_reset_index.py +160 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_timedelta.py +128 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/io/__init__.py +16 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/io/test_io.py +137 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/plot/__init__.py +16 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/plot/test_frame_plot.py +170 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/plot/test_frame_plot_matplotlib.py +547 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/plot/test_frame_plot_plotly.py +285 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/plot/test_series_plot.py +106 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/plot/test_series_plot_matplotlib.py +409 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/plot/test_series_plot_plotly.py +247 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/__init__.py +16 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_all_any.py +105 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_arg_ops.py +197 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_as_of.py +137 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_as_type.py +227 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_compute.py +634 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_conversion.py +88 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_cumulative.py +139 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_index.py +475 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_missing_data.py +265 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_series.py +818 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_sort.py +162 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_stat.py +780 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_categorical.py +741 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_config.py +160 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_csv.py +453 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_dataframe_conversion.py +281 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_dataframe_spark_io.py +487 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_default_index.py +109 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_ewm.py +434 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_expanding.py +253 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_extension.py +152 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_frame_spark.py +162 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_generic_functions.py +234 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_indexing.py +1339 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_indexops_spark.py +82 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_internal.py +124 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_namespace.py +638 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_numpy_compat.py +200 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_ops_on_diff_frames.py +1355 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_ops_on_diff_frames_groupby.py +655 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_ops_on_diff_frames_groupby_expanding.py +113 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_ops_on_diff_frames_groupby_rolling.py +118 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_repr.py +192 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_resample.py +346 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_reshape.py +495 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_rolling.py +263 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_scalars.py +59 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_series_conversion.py +85 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_series_datetime.py +364 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_series_string.py +362 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_spark_functions.py +46 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_sql.py +123 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_stats.py +581 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_typedef.py +447 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_utils.py +301 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_window.py +465 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/typedef/__init__.py +18 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/typedef/typehints.py +874 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/usage_logging/__init__.py +143 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/usage_logging/usage_logger.py +132 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/utils.py +1063 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/window.py +2702 -0
- snowflake/snowpark_connect/includes/python/pyspark/profiler.py +489 -0
- snowflake/snowpark_connect/includes/python/pyspark/py.typed +1 -0
- snowflake/snowpark_connect/includes/python/pyspark/python/pyspark/shell.py +123 -0
- snowflake/snowpark_connect/includes/python/pyspark/rdd.py +5518 -0
- snowflake/snowpark_connect/includes/python/pyspark/rddsampler.py +115 -0
- snowflake/snowpark_connect/includes/python/pyspark/resource/__init__.py +38 -0
- snowflake/snowpark_connect/includes/python/pyspark/resource/information.py +69 -0
- snowflake/snowpark_connect/includes/python/pyspark/resource/profile.py +317 -0
- snowflake/snowpark_connect/includes/python/pyspark/resource/requests.py +539 -0
- snowflake/snowpark_connect/includes/python/pyspark/resource/tests/__init__.py +16 -0
- snowflake/snowpark_connect/includes/python/pyspark/resource/tests/test_resources.py +83 -0
- snowflake/snowpark_connect/includes/python/pyspark/resultiterable.py +45 -0
- snowflake/snowpark_connect/includes/python/pyspark/serializers.py +681 -0
- snowflake/snowpark_connect/includes/python/pyspark/shell.py +123 -0
- snowflake/snowpark_connect/includes/python/pyspark/shuffle.py +854 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/__init__.py +75 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/_typing.pyi +80 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/avro/__init__.py +18 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/avro/functions.py +188 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/catalog.py +1270 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/column.py +1431 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/conf.py +99 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/connect/__init__.py +18 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/connect/_typing.py +90 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/connect/avro/__init__.py +18 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/connect/avro/functions.py +107 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/connect/catalog.py +356 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/connect/client/__init__.py +22 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/connect/client/artifact.py +412 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/connect/client/core.py +1689 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/connect/client/reattach.py +340 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/connect/column.py +514 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/connect/conf.py +128 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/connect/conversion.py +490 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/connect/dataframe.py +2172 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/connect/expressions.py +1056 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/connect/functions.py +3937 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/connect/group.py +418 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/connect/plan.py +2289 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/connect/proto/__init__.py +25 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/connect/proto/base_pb2.py +203 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/connect/proto/base_pb2.pyi +2718 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/connect/proto/base_pb2_grpc.py +423 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/connect/proto/catalog_pb2.py +109 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/connect/proto/catalog_pb2.pyi +1130 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/connect/proto/commands_pb2.py +141 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/connect/proto/commands_pb2.pyi +1766 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/connect/proto/common_pb2.py +47 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/connect/proto/common_pb2.pyi +123 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/connect/proto/example_plugins_pb2.py +53 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/connect/proto/example_plugins_pb2.pyi +112 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/connect/proto/expressions_pb2.py +107 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/connect/proto/expressions_pb2.pyi +1507 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/connect/proto/relations_pb2.py +195 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/connect/proto/relations_pb2.pyi +3613 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/connect/proto/types_pb2.py +95 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/connect/proto/types_pb2.pyi +980 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/connect/protobuf/__init__.py +18 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/connect/protobuf/functions.py +166 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/connect/readwriter.py +861 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/connect/session.py +952 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/connect/streaming/__init__.py +22 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/connect/streaming/query.py +295 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/connect/streaming/readwriter.py +618 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/connect/streaming/worker/__init__.py +18 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/connect/streaming/worker/foreach_batch_worker.py +87 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/connect/streaming/worker/listener_worker.py +100 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/connect/types.py +301 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/connect/udf.py +296 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/connect/udtf.py +200 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/connect/utils.py +58 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/connect/window.py +266 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/context.py +818 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/dataframe.py +5973 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/functions.py +15889 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/group.py +547 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/observation.py +152 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/pandas/__init__.py +21 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/pandas/_typing/__init__.pyi +344 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/pandas/_typing/protocols/__init__.pyi +17 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/pandas/_typing/protocols/frame.pyi +20 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/pandas/_typing/protocols/series.pyi +20 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/pandas/conversion.py +671 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/pandas/functions.py +480 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/pandas/functions.pyi +132 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/pandas/group_ops.py +523 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/pandas/map_ops.py +216 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/pandas/serializers.py +1019 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/pandas/typehints.py +172 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/pandas/types.py +972 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/pandas/utils.py +86 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/protobuf/__init__.py +18 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/protobuf/functions.py +334 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/readwriter.py +2159 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/session.py +2088 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/sql_formatter.py +84 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/streaming/__init__.py +21 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/streaming/listener.py +1050 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/streaming/query.py +746 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/streaming/readwriter.py +1652 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/streaming/state.py +288 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/__init__.py +16 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/__init__.py +16 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/client/__init__.py +16 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/client/test_artifact.py +420 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/client/test_client.py +358 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/streaming/__init__.py +16 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/streaming/test_parity_foreach.py +36 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/streaming/test_parity_foreach_batch.py +44 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/streaming/test_parity_listener.py +116 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/streaming/test_parity_streaming.py +35 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_connect_basic.py +3612 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_connect_column.py +1042 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_connect_function.py +2381 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_connect_plan.py +1060 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_arrow.py +163 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_arrow_map.py +38 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_arrow_python_udf.py +48 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_catalog.py +36 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_column.py +55 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_conf.py +36 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_dataframe.py +96 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_datasources.py +44 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_errors.py +36 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_functions.py +59 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_group.py +36 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_pandas_cogrouped_map.py +59 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_pandas_grouped_map.py +74 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_pandas_grouped_map_with_state.py +62 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_pandas_map.py +58 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_pandas_udf.py +70 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_pandas_udf_grouped_agg.py +50 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_pandas_udf_scalar.py +68 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_pandas_udf_window.py +40 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_readwriter.py +46 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_serde.py +44 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_types.py +100 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_udf.py +100 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_udtf.py +163 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_session.py +181 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_utils.py +42 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/__init__.py +16 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_cogrouped_map.py +623 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_grouped_map.py +869 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_grouped_map_with_state.py +342 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_map.py +436 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_udf.py +363 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_udf_grouped_agg.py +592 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_udf_scalar.py +1503 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_udf_typehints.py +392 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_udf_typehints_with_future_annotations.py +375 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_udf_window.py +411 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/streaming/__init__.py +16 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/streaming/test_streaming.py +401 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/streaming/test_streaming_foreach.py +295 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/streaming/test_streaming_foreach_batch.py +106 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/streaming/test_streaming_listener.py +558 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_arrow.py +1346 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_arrow_map.py +182 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_arrow_python_udf.py +202 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_catalog.py +503 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_column.py +225 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_conf.py +83 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_context.py +201 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_dataframe.py +1931 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_datasources.py +256 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_errors.py +69 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_functions.py +1349 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_group.py +53 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_pandas_sqlmetrics.py +68 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_readwriter.py +283 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_serde.py +155 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_session.py +412 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_types.py +1581 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_udf.py +961 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_udf_profiler.py +165 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_udtf.py +1456 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_utils.py +1686 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/types.py +2558 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/udf.py +714 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/udtf.py +325 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/utils.py +339 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/window.py +492 -0
- snowflake/snowpark_connect/includes/python/pyspark/statcounter.py +165 -0
- snowflake/snowpark_connect/includes/python/pyspark/status.py +112 -0
- snowflake/snowpark_connect/includes/python/pyspark/storagelevel.py +97 -0
- snowflake/snowpark_connect/includes/python/pyspark/streaming/__init__.py +22 -0
- snowflake/snowpark_connect/includes/python/pyspark/streaming/context.py +471 -0
- snowflake/snowpark_connect/includes/python/pyspark/streaming/dstream.py +933 -0
- snowflake/snowpark_connect/includes/python/pyspark/streaming/kinesis.py +205 -0
- snowflake/snowpark_connect/includes/python/pyspark/streaming/listener.py +83 -0
- snowflake/snowpark_connect/includes/python/pyspark/streaming/tests/__init__.py +16 -0
- snowflake/snowpark_connect/includes/python/pyspark/streaming/tests/test_context.py +184 -0
- snowflake/snowpark_connect/includes/python/pyspark/streaming/tests/test_dstream.py +706 -0
- snowflake/snowpark_connect/includes/python/pyspark/streaming/tests/test_kinesis.py +118 -0
- snowflake/snowpark_connect/includes/python/pyspark/streaming/tests/test_listener.py +160 -0
- snowflake/snowpark_connect/includes/python/pyspark/streaming/util.py +168 -0
- snowflake/snowpark_connect/includes/python/pyspark/taskcontext.py +502 -0
- snowflake/snowpark_connect/includes/python/pyspark/testing/__init__.py +21 -0
- snowflake/snowpark_connect/includes/python/pyspark/testing/connectutils.py +199 -0
- snowflake/snowpark_connect/includes/python/pyspark/testing/mllibutils.py +30 -0
- snowflake/snowpark_connect/includes/python/pyspark/testing/mlutils.py +275 -0
- snowflake/snowpark_connect/includes/python/pyspark/testing/objects.py +121 -0
- snowflake/snowpark_connect/includes/python/pyspark/testing/pandasutils.py +714 -0
- snowflake/snowpark_connect/includes/python/pyspark/testing/sqlutils.py +168 -0
- snowflake/snowpark_connect/includes/python/pyspark/testing/streamingutils.py +178 -0
- snowflake/snowpark_connect/includes/python/pyspark/testing/utils.py +636 -0
- snowflake/snowpark_connect/includes/python/pyspark/tests/__init__.py +16 -0
- snowflake/snowpark_connect/includes/python/pyspark/tests/test_appsubmit.py +306 -0
- snowflake/snowpark_connect/includes/python/pyspark/tests/test_broadcast.py +196 -0
- snowflake/snowpark_connect/includes/python/pyspark/tests/test_conf.py +44 -0
- snowflake/snowpark_connect/includes/python/pyspark/tests/test_context.py +346 -0
- snowflake/snowpark_connect/includes/python/pyspark/tests/test_daemon.py +89 -0
- snowflake/snowpark_connect/includes/python/pyspark/tests/test_install_spark.py +124 -0
- snowflake/snowpark_connect/includes/python/pyspark/tests/test_join.py +69 -0
- snowflake/snowpark_connect/includes/python/pyspark/tests/test_memory_profiler.py +167 -0
- snowflake/snowpark_connect/includes/python/pyspark/tests/test_pin_thread.py +194 -0
- snowflake/snowpark_connect/includes/python/pyspark/tests/test_profiler.py +168 -0
- snowflake/snowpark_connect/includes/python/pyspark/tests/test_rdd.py +939 -0
- snowflake/snowpark_connect/includes/python/pyspark/tests/test_rddbarrier.py +52 -0
- snowflake/snowpark_connect/includes/python/pyspark/tests/test_rddsampler.py +66 -0
- snowflake/snowpark_connect/includes/python/pyspark/tests/test_readwrite.py +368 -0
- snowflake/snowpark_connect/includes/python/pyspark/tests/test_serializers.py +257 -0
- snowflake/snowpark_connect/includes/python/pyspark/tests/test_shuffle.py +267 -0
- snowflake/snowpark_connect/includes/python/pyspark/tests/test_stage_sched.py +153 -0
- snowflake/snowpark_connect/includes/python/pyspark/tests/test_statcounter.py +130 -0
- snowflake/snowpark_connect/includes/python/pyspark/tests/test_taskcontext.py +350 -0
- snowflake/snowpark_connect/includes/python/pyspark/tests/test_util.py +97 -0
- snowflake/snowpark_connect/includes/python/pyspark/tests/test_worker.py +271 -0
- snowflake/snowpark_connect/includes/python/pyspark/traceback_utils.py +81 -0
- snowflake/snowpark_connect/includes/python/pyspark/util.py +416 -0
- snowflake/snowpark_connect/includes/python/pyspark/version.py +19 -0
- snowflake/snowpark_connect/includes/python/pyspark/worker.py +1307 -0
- snowflake/snowpark_connect/includes/python/pyspark/worker_util.py +46 -0
- snowflake/snowpark_connect/proto/__init__.py +10 -0
- snowflake/snowpark_connect/proto/control_pb2.py +35 -0
- snowflake/snowpark_connect/proto/control_pb2.pyi +38 -0
- snowflake/snowpark_connect/proto/control_pb2_grpc.py +183 -0
- snowflake/snowpark_connect/proto/snowflake_expression_ext_pb2.py +35 -0
- snowflake/snowpark_connect/proto/snowflake_expression_ext_pb2.pyi +53 -0
- snowflake/snowpark_connect/proto/snowflake_rdd_pb2.pyi +39 -0
- snowflake/snowpark_connect/proto/snowflake_relation_ext_pb2.py +47 -0
- snowflake/snowpark_connect/proto/snowflake_relation_ext_pb2.pyi +111 -0
- snowflake/snowpark_connect/relation/__init__.py +3 -0
- snowflake/snowpark_connect/relation/catalogs/__init__.py +12 -0
- snowflake/snowpark_connect/relation/catalogs/abstract_spark_catalog.py +287 -0
- snowflake/snowpark_connect/relation/catalogs/snowflake_catalog.py +467 -0
- snowflake/snowpark_connect/relation/catalogs/utils.py +51 -0
- snowflake/snowpark_connect/relation/io_utils.py +76 -0
- snowflake/snowpark_connect/relation/map_aggregate.py +322 -0
- snowflake/snowpark_connect/relation/map_catalog.py +151 -0
- snowflake/snowpark_connect/relation/map_column_ops.py +1068 -0
- snowflake/snowpark_connect/relation/map_crosstab.py +48 -0
- snowflake/snowpark_connect/relation/map_extension.py +412 -0
- snowflake/snowpark_connect/relation/map_join.py +341 -0
- snowflake/snowpark_connect/relation/map_local_relation.py +326 -0
- snowflake/snowpark_connect/relation/map_map_partitions.py +146 -0
- snowflake/snowpark_connect/relation/map_relation.py +253 -0
- snowflake/snowpark_connect/relation/map_row_ops.py +716 -0
- snowflake/snowpark_connect/relation/map_sample_by.py +35 -0
- snowflake/snowpark_connect/relation/map_show_string.py +50 -0
- snowflake/snowpark_connect/relation/map_sql.py +1874 -0
- snowflake/snowpark_connect/relation/map_stats.py +324 -0
- snowflake/snowpark_connect/relation/map_subquery_alias.py +32 -0
- snowflake/snowpark_connect/relation/map_udtf.py +288 -0
- snowflake/snowpark_connect/relation/read/__init__.py +7 -0
- snowflake/snowpark_connect/relation/read/jdbc_read_dbapi.py +668 -0
- snowflake/snowpark_connect/relation/read/map_read.py +367 -0
- snowflake/snowpark_connect/relation/read/map_read_csv.py +142 -0
- snowflake/snowpark_connect/relation/read/map_read_jdbc.py +108 -0
- snowflake/snowpark_connect/relation/read/map_read_json.py +344 -0
- snowflake/snowpark_connect/relation/read/map_read_parquet.py +194 -0
- snowflake/snowpark_connect/relation/read/map_read_socket.py +59 -0
- snowflake/snowpark_connect/relation/read/map_read_table.py +109 -0
- snowflake/snowpark_connect/relation/read/map_read_text.py +106 -0
- snowflake/snowpark_connect/relation/read/reader_config.py +399 -0
- snowflake/snowpark_connect/relation/read/utils.py +155 -0
- snowflake/snowpark_connect/relation/stage_locator.py +161 -0
- snowflake/snowpark_connect/relation/utils.py +219 -0
- snowflake/snowpark_connect/relation/write/__init__.py +3 -0
- snowflake/snowpark_connect/relation/write/jdbc_write_dbapi.py +339 -0
- snowflake/snowpark_connect/relation/write/map_write.py +436 -0
- snowflake/snowpark_connect/relation/write/map_write_jdbc.py +48 -0
- snowflake/snowpark_connect/resources/java_udfs-1.0-SNAPSHOT.jar +0 -0
- snowflake/snowpark_connect/resources_initializer.py +75 -0
- snowflake/snowpark_connect/server.py +1136 -0
- snowflake/snowpark_connect/start_server.py +32 -0
- snowflake/snowpark_connect/tcm.py +8 -0
- snowflake/snowpark_connect/type_mapping.py +1003 -0
- snowflake/snowpark_connect/typed_column.py +94 -0
- snowflake/snowpark_connect/utils/__init__.py +3 -0
- snowflake/snowpark_connect/utils/artifacts.py +48 -0
- snowflake/snowpark_connect/utils/attribute_handling.py +72 -0
- snowflake/snowpark_connect/utils/cache.py +84 -0
- snowflake/snowpark_connect/utils/concurrent.py +124 -0
- snowflake/snowpark_connect/utils/context.py +390 -0
- snowflake/snowpark_connect/utils/describe_query_cache.py +231 -0
- snowflake/snowpark_connect/utils/interrupt.py +85 -0
- snowflake/snowpark_connect/utils/io_utils.py +35 -0
- snowflake/snowpark_connect/utils/pandas_udtf_utils.py +117 -0
- snowflake/snowpark_connect/utils/profiling.py +47 -0
- snowflake/snowpark_connect/utils/session.py +180 -0
- snowflake/snowpark_connect/utils/snowpark_connect_logging.py +38 -0
- snowflake/snowpark_connect/utils/telemetry.py +513 -0
- snowflake/snowpark_connect/utils/udf_cache.py +392 -0
- snowflake/snowpark_connect/utils/udf_helper.py +328 -0
- snowflake/snowpark_connect/utils/udf_utils.py +310 -0
- snowflake/snowpark_connect/utils/udtf_helper.py +420 -0
- snowflake/snowpark_connect/utils/udtf_utils.py +799 -0
- snowflake/snowpark_connect/utils/xxhash64.py +247 -0
- snowflake/snowpark_connect/version.py +6 -0
- snowpark_connect-0.20.2.data/scripts/snowpark-connect +71 -0
- snowpark_connect-0.20.2.data/scripts/snowpark-session +11 -0
- snowpark_connect-0.20.2.data/scripts/snowpark-submit +354 -0
- snowpark_connect-0.20.2.dist-info/METADATA +37 -0
- snowpark_connect-0.20.2.dist-info/RECORD +879 -0
- snowpark_connect-0.20.2.dist-info/WHEEL +5 -0
- snowpark_connect-0.20.2.dist-info/licenses/LICENSE.txt +202 -0
- snowpark_connect-0.20.2.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,1136 @@
|
|
|
1
|
+
#
|
|
2
|
+
# Copyright (c) 2012-2025 Snowflake Computing Inc. All rights reserved.
|
|
3
|
+
#
|
|
4
|
+
|
|
5
|
+
# Some content in this file is derived from Apache Spark. In accordance
|
|
6
|
+
# with Apache 2 license, the license for Apache Spark is as follows:
|
|
7
|
+
#
|
|
8
|
+
# Licensed to the Apache Software Foundation (ASF) under one or more
|
|
9
|
+
# contributor license agreements. See the NOTICE file distributed with
|
|
10
|
+
# this work for additional information regarding copyright ownership.
|
|
11
|
+
# The ASF licenses this file to You under the Apache License, Version 2.0
|
|
12
|
+
# (the "License"); you may not use this file except in compliance with
|
|
13
|
+
# the License. You may obtain a copy of the License at
|
|
14
|
+
#
|
|
15
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
16
|
+
#
|
|
17
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
18
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
19
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
20
|
+
# See the License for the specific language governing permissions and
|
|
21
|
+
# limitations under the License.
|
|
22
|
+
#
|
|
23
|
+
|
|
24
|
+
import atexit
|
|
25
|
+
import logging
|
|
26
|
+
import os
|
|
27
|
+
import pathlib
|
|
28
|
+
import socket
|
|
29
|
+
import tempfile
|
|
30
|
+
import threading
|
|
31
|
+
import urllib.parse
|
|
32
|
+
import zipfile
|
|
33
|
+
from concurrent import futures
|
|
34
|
+
from typing import Any, Callable, Dict, List, Optional, Tuple
|
|
35
|
+
|
|
36
|
+
import grpc
|
|
37
|
+
import jpype
|
|
38
|
+
import pyspark
|
|
39
|
+
import pyspark.sql.connect.proto.base_pb2 as proto_base
|
|
40
|
+
import pyspark.sql.connect.proto.base_pb2_grpc as proto_base_grpc
|
|
41
|
+
import pyspark.sql.connect.proto.common_pb2 as common_proto
|
|
42
|
+
import pyspark.sql.connect.proto.relations_pb2 as relations_proto
|
|
43
|
+
import pyspark.sql.connect.proto.types_pb2 as types_proto
|
|
44
|
+
from packaging import version
|
|
45
|
+
from pyspark import StorageLevel
|
|
46
|
+
from pyspark.conf import SparkConf
|
|
47
|
+
from pyspark.errors import PySparkValueError
|
|
48
|
+
from pyspark.sql.connect.client.core import ChannelBuilder
|
|
49
|
+
from pyspark.sql.connect.session import SparkSession
|
|
50
|
+
|
|
51
|
+
import snowflake.snowpark_connect
|
|
52
|
+
import snowflake.snowpark_connect.proto.control_pb2_grpc as control_grpc
|
|
53
|
+
import snowflake.snowpark_connect.tcm as tcm
|
|
54
|
+
from snowflake import snowpark
|
|
55
|
+
from snowflake.snowpark_connect.analyze_plan.map_tree_string import map_tree_string
|
|
56
|
+
from snowflake.snowpark_connect.config import route_config_proto
|
|
57
|
+
from snowflake.snowpark_connect.constants import SERVER_SIDE_SESSION_ID
|
|
58
|
+
from snowflake.snowpark_connect.control_server import ControlServicer
|
|
59
|
+
from snowflake.snowpark_connect.error.error_utils import build_grpc_error_response
|
|
60
|
+
from snowflake.snowpark_connect.execute_plan.map_execution_command import (
|
|
61
|
+
map_execution_command,
|
|
62
|
+
)
|
|
63
|
+
from snowflake.snowpark_connect.execute_plan.map_execution_root import (
|
|
64
|
+
map_execution_root,
|
|
65
|
+
)
|
|
66
|
+
from snowflake.snowpark_connect.relation.map_local_relation import map_local_relation
|
|
67
|
+
from snowflake.snowpark_connect.relation.map_relation import map_relation
|
|
68
|
+
from snowflake.snowpark_connect.relation.utils import get_semantic_string
|
|
69
|
+
from snowflake.snowpark_connect.resources_initializer import initialize_resources_async
|
|
70
|
+
from snowflake.snowpark_connect.type_mapping import (
|
|
71
|
+
parse_ddl_string,
|
|
72
|
+
snowpark_to_proto_type,
|
|
73
|
+
)
|
|
74
|
+
from snowflake.snowpark_connect.utils.artifacts import check_checksum, write_artifact
|
|
75
|
+
from snowflake.snowpark_connect.utils.cache import (
|
|
76
|
+
df_cache_map_get,
|
|
77
|
+
df_cache_map_pop,
|
|
78
|
+
df_cache_map_put_if_absent,
|
|
79
|
+
)
|
|
80
|
+
from snowflake.snowpark_connect.utils.context import (
|
|
81
|
+
clear_context_data,
|
|
82
|
+
get_session_id,
|
|
83
|
+
set_session_id,
|
|
84
|
+
set_spark_version,
|
|
85
|
+
)
|
|
86
|
+
from snowflake.snowpark_connect.utils.interrupt import (
|
|
87
|
+
interrupt_all_queries,
|
|
88
|
+
interrupt_queries_with_tag,
|
|
89
|
+
interrupt_query,
|
|
90
|
+
)
|
|
91
|
+
from snowflake.snowpark_connect.utils.profiling import profile_method
|
|
92
|
+
from snowflake.snowpark_connect.utils.session import (
|
|
93
|
+
configure_snowpark_session,
|
|
94
|
+
get_or_create_snowpark_session,
|
|
95
|
+
set_query_tags,
|
|
96
|
+
)
|
|
97
|
+
from snowflake.snowpark_connect.utils.snowpark_connect_logging import (
|
|
98
|
+
log_waring_once_storage_level,
|
|
99
|
+
logger,
|
|
100
|
+
)
|
|
101
|
+
from snowflake.snowpark_connect.utils.telemetry import (
|
|
102
|
+
SnowparkConnectNotImplementedError,
|
|
103
|
+
telemetry,
|
|
104
|
+
)
|
|
105
|
+
from snowflake.snowpark_connect.utils.xxhash64 import xxhash64_string
|
|
106
|
+
|
|
107
|
+
DEFAULT_PORT = 15002
|
|
108
|
+
|
|
109
|
+
# https://github.com/apache/spark/blob/v3.5.3/connector/connect/common/src/main/scala/org/apache/spark/sql/connect/common/config/ConnectCommon.scala#L21
|
|
110
|
+
_SPARK_CONNECT_GRPC_MAX_MESSAGE_SIZE = 128 * 1024 * 1024
|
|
111
|
+
# TODO: Verify if we we want to configure it via env variables.
|
|
112
|
+
_SPARK_CONNECT_GRPC_MAX_METADATA_SIZE = 64 * 1024 # 64kb
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
def _handle_exception(context, e: Exception):
|
|
116
|
+
import traceback
|
|
117
|
+
|
|
118
|
+
traceback.print_exc()
|
|
119
|
+
|
|
120
|
+
telemetry.report_request_failure(e)
|
|
121
|
+
|
|
122
|
+
if tcm.TCM_MODE:
|
|
123
|
+
# TODO: SNOW-2009834 gracefully return error back in TCM
|
|
124
|
+
raise e
|
|
125
|
+
|
|
126
|
+
from grpc_status import rpc_status
|
|
127
|
+
|
|
128
|
+
rich_status = build_grpc_error_response(e)
|
|
129
|
+
context.abort_with_status(rpc_status.to_status(rich_status))
|
|
130
|
+
|
|
131
|
+
|
|
132
|
+
class SnowflakeConnectServicer(proto_base_grpc.SparkConnectServiceServicer):
|
|
133
|
+
def __init__(
|
|
134
|
+
self,
|
|
135
|
+
log_request_fn: Optional[Callable[[bytearray], None]] = None,
|
|
136
|
+
) -> None:
|
|
137
|
+
self.log_request_fn = log_request_fn
|
|
138
|
+
# Trigger async initialization here, so that we reduce overhead for rpc calls.
|
|
139
|
+
initialize_resources_async()
|
|
140
|
+
|
|
141
|
+
@profile_method
|
|
142
|
+
def ExecutePlan(self, request: proto_base.ExecutePlanRequest, context):
|
|
143
|
+
"""Executes a request that contains the query and returns a stream of [[Response]].
|
|
144
|
+
|
|
145
|
+
It is guaranteed that there is at least one ARROW batch returned even if the result set is empty.
|
|
146
|
+
"""
|
|
147
|
+
logger.info("ExecutePlan")
|
|
148
|
+
if self.log_request_fn is not None:
|
|
149
|
+
self.log_request_fn(request.SerializeToString())
|
|
150
|
+
|
|
151
|
+
# TODO: remove session id context when we host this in Snowflake server
|
|
152
|
+
# set the thread-local context of session id
|
|
153
|
+
clear_context_data()
|
|
154
|
+
set_session_id(request.session_id)
|
|
155
|
+
set_spark_version(request.client_type)
|
|
156
|
+
telemetry.initialize_request_summary(request)
|
|
157
|
+
|
|
158
|
+
set_query_tags(request.tags)
|
|
159
|
+
|
|
160
|
+
result_iter = iter(())
|
|
161
|
+
try:
|
|
162
|
+
match request.plan.WhichOneof("op_type"):
|
|
163
|
+
case "root":
|
|
164
|
+
logger.info("ROOT")
|
|
165
|
+
result_iter = map_execution_root(request)
|
|
166
|
+
case "command":
|
|
167
|
+
logger.info("COMMAND")
|
|
168
|
+
command_result = map_execution_command(request)
|
|
169
|
+
if command_result is not None:
|
|
170
|
+
result_iter = iter([command_result])
|
|
171
|
+
|
|
172
|
+
yield from result_iter
|
|
173
|
+
yield proto_base.ExecutePlanResponse(
|
|
174
|
+
session_id=request.session_id,
|
|
175
|
+
operation_id=SERVER_SIDE_SESSION_ID,
|
|
176
|
+
result_complete=proto_base.ExecutePlanResponse.ResultComplete(),
|
|
177
|
+
)
|
|
178
|
+
except Exception as e:
|
|
179
|
+
_handle_exception(context, e)
|
|
180
|
+
finally:
|
|
181
|
+
telemetry.send_request_summary_telemetry()
|
|
182
|
+
|
|
183
|
+
@profile_method
|
|
184
|
+
def AnalyzePlan(self, request: proto_base.AnalyzePlanRequest, context):
|
|
185
|
+
"""Analyzes a query and returns a [[AnalyzeResponse]] containing metadata about the query."""
|
|
186
|
+
logger.info(f"AnalyzePlan: {request.WhichOneof('analyze')}")
|
|
187
|
+
if self.log_request_fn is not None:
|
|
188
|
+
self.log_request_fn(request.SerializeToString())
|
|
189
|
+
try:
|
|
190
|
+
# TODO: remove session id context when we host this in Snowflake server
|
|
191
|
+
# set the thread-local context of session id
|
|
192
|
+
clear_context_data()
|
|
193
|
+
set_session_id(request.session_id)
|
|
194
|
+
set_spark_version(request.client_type)
|
|
195
|
+
telemetry.initialize_request_summary(request)
|
|
196
|
+
match request.WhichOneof("analyze"):
|
|
197
|
+
case "schema":
|
|
198
|
+
snowpark_df = map_relation(request.schema.plan.root)
|
|
199
|
+
snowpark_schema: snowpark.types.StructType = snowpark_df.schema
|
|
200
|
+
schema = proto_base.AnalyzePlanResponse.Schema(
|
|
201
|
+
schema=types_proto.DataType(
|
|
202
|
+
**snowpark_to_proto_type(
|
|
203
|
+
snowpark_schema, snowpark_df._column_map, snowpark_df
|
|
204
|
+
)
|
|
205
|
+
)
|
|
206
|
+
)
|
|
207
|
+
return proto_base.AnalyzePlanResponse(
|
|
208
|
+
session_id=request.session_id,
|
|
209
|
+
schema=schema,
|
|
210
|
+
)
|
|
211
|
+
case "tree_string":
|
|
212
|
+
return map_tree_string(request)
|
|
213
|
+
case "is_local":
|
|
214
|
+
return proto_base.AnalyzePlanResponse(
|
|
215
|
+
session_id=request.session_id,
|
|
216
|
+
is_local=proto_base.AnalyzePlanResponse.IsLocal(is_local=False),
|
|
217
|
+
)
|
|
218
|
+
case "ddl_parse":
|
|
219
|
+
return proto_base.AnalyzePlanResponse(
|
|
220
|
+
session_id=request.session_id,
|
|
221
|
+
ddl_parse=proto_base.AnalyzePlanResponse.DDLParse(
|
|
222
|
+
parsed=parse_ddl_string(request.ddl_parse.ddl_string)
|
|
223
|
+
),
|
|
224
|
+
)
|
|
225
|
+
case "get_storage_level":
|
|
226
|
+
return proto_base.AnalyzePlanResponse(
|
|
227
|
+
session_id=request.session_id,
|
|
228
|
+
get_storage_level=proto_base.AnalyzePlanResponse.GetStorageLevel(
|
|
229
|
+
storage_level=common_proto.StorageLevel(
|
|
230
|
+
use_disk=True, use_memory=True
|
|
231
|
+
)
|
|
232
|
+
),
|
|
233
|
+
)
|
|
234
|
+
case "persist":
|
|
235
|
+
plan_id = request.persist.relation.common.plan_id
|
|
236
|
+
# cache the plan if it is not already in the map
|
|
237
|
+
|
|
238
|
+
df_cache_map_put_if_absent(
|
|
239
|
+
(request.session_id, plan_id),
|
|
240
|
+
lambda: map_relation(request.persist.relation),
|
|
241
|
+
materialize=True,
|
|
242
|
+
)
|
|
243
|
+
|
|
244
|
+
storage_level = request.persist.storage_level
|
|
245
|
+
if storage_level != StorageLevel.DISK_ONLY:
|
|
246
|
+
log_waring_once_storage_level(storage_level)
|
|
247
|
+
|
|
248
|
+
return proto_base.AnalyzePlanResponse(
|
|
249
|
+
session_id=request.session_id,
|
|
250
|
+
persist=proto_base.AnalyzePlanResponse.Persist(),
|
|
251
|
+
)
|
|
252
|
+
case "unpersist":
|
|
253
|
+
plan_id = request.persist.relation.common.plan_id
|
|
254
|
+
# unpersist the cached plan
|
|
255
|
+
df_cache_map_pop((request.session_id, plan_id))
|
|
256
|
+
|
|
257
|
+
return proto_base.AnalyzePlanResponse(
|
|
258
|
+
session_id=request.session_id,
|
|
259
|
+
unpersist=proto_base.AnalyzePlanResponse.Unpersist(),
|
|
260
|
+
)
|
|
261
|
+
case "explain":
|
|
262
|
+
# Snowflake only exposes simplified execution plans, similar to Spark's optimized logical plans.
|
|
263
|
+
# Snowpark provides the execution plan IFF the dataframe maps to a single query.
|
|
264
|
+
# TODO: Do we need to return a Spark-like plan?
|
|
265
|
+
snowpark_df = map_relation(request.explain.plan.root)
|
|
266
|
+
return proto_base.AnalyzePlanResponse(
|
|
267
|
+
session_id=request.session_id,
|
|
268
|
+
explain=proto_base.AnalyzePlanResponse.Explain(
|
|
269
|
+
explain_string=snowpark_df._explain_string()
|
|
270
|
+
),
|
|
271
|
+
)
|
|
272
|
+
case "spark_version":
|
|
273
|
+
return proto_base.AnalyzePlanResponse(
|
|
274
|
+
session_id=request.session_id,
|
|
275
|
+
spark_version=proto_base.AnalyzePlanResponse.SparkVersion(
|
|
276
|
+
version=pyspark.__version__
|
|
277
|
+
),
|
|
278
|
+
)
|
|
279
|
+
case "same_semantics":
|
|
280
|
+
target_queries_hash = xxhash64_string(
|
|
281
|
+
get_semantic_string(request.same_semantics.target_plan.root)
|
|
282
|
+
)
|
|
283
|
+
other_queries_hash = xxhash64_string(
|
|
284
|
+
get_semantic_string(request.same_semantics.other_plan.root)
|
|
285
|
+
)
|
|
286
|
+
return proto_base.AnalyzePlanResponse(
|
|
287
|
+
session_id=request.session_id,
|
|
288
|
+
same_semantics=proto_base.AnalyzePlanResponse.SameSemantics(
|
|
289
|
+
result=target_queries_hash == other_queries_hash
|
|
290
|
+
),
|
|
291
|
+
)
|
|
292
|
+
case "semantic_hash":
|
|
293
|
+
queries_str = get_semantic_string(request.semantic_hash.plan.root)
|
|
294
|
+
return proto_base.AnalyzePlanResponse(
|
|
295
|
+
session_id=request.session_id,
|
|
296
|
+
semantic_hash=proto_base.AnalyzePlanResponse.SemanticHash(
|
|
297
|
+
result=xxhash64_string(queries_str)
|
|
298
|
+
& 0x7FFFFFFF # need a 32 bit int here.
|
|
299
|
+
),
|
|
300
|
+
)
|
|
301
|
+
case "is_streaming":
|
|
302
|
+
return proto_base.AnalyzePlanResponse(
|
|
303
|
+
session_id=request.session_id,
|
|
304
|
+
is_streaming=proto_base.AnalyzePlanResponse.IsStreaming(
|
|
305
|
+
is_streaming=False
|
|
306
|
+
),
|
|
307
|
+
)
|
|
308
|
+
case "input_files":
|
|
309
|
+
files = []
|
|
310
|
+
if request.input_files.plan.root.HasField("read"):
|
|
311
|
+
files = _get_files_metadata(
|
|
312
|
+
request.input_files.plan.root.read.data_source
|
|
313
|
+
)
|
|
314
|
+
elif request.input_files.plan.root.HasField("join"):
|
|
315
|
+
left_files = _get_files_metadata(
|
|
316
|
+
request.input_files.plan.root.join.left.read.data_source
|
|
317
|
+
)
|
|
318
|
+
right_files = _get_files_metadata(
|
|
319
|
+
request.input_files.plan.root.join.right.read.data_source
|
|
320
|
+
)
|
|
321
|
+
files = left_files + right_files
|
|
322
|
+
return proto_base.AnalyzePlanResponse(
|
|
323
|
+
session_id=request.session_id,
|
|
324
|
+
input_files=proto_base.AnalyzePlanResponse.InputFiles(
|
|
325
|
+
files=list(set(files))
|
|
326
|
+
),
|
|
327
|
+
)
|
|
328
|
+
case _:
|
|
329
|
+
raise SnowparkConnectNotImplementedError(
|
|
330
|
+
f"ANALYZE PLAN NOT IMPLEMENTED:\n{request}"
|
|
331
|
+
)
|
|
332
|
+
except Exception as e:
|
|
333
|
+
_handle_exception(context, e)
|
|
334
|
+
finally:
|
|
335
|
+
telemetry.send_request_summary_telemetry()
|
|
336
|
+
|
|
337
|
+
@staticmethod
|
|
338
|
+
def Config(
|
|
339
|
+
request: proto_base.ConfigRequest,
|
|
340
|
+
context,
|
|
341
|
+
options=(),
|
|
342
|
+
channel_credentials=None,
|
|
343
|
+
call_credentials=None,
|
|
344
|
+
insecure=False,
|
|
345
|
+
compression=None,
|
|
346
|
+
wait_for_ready=None,
|
|
347
|
+
timeout=None,
|
|
348
|
+
metadata=None,
|
|
349
|
+
):
|
|
350
|
+
"""Update or fetch the configurations and returns a [[ConfigResponse]] containing the result."""
|
|
351
|
+
logger.info("Config")
|
|
352
|
+
try:
|
|
353
|
+
telemetry.initialize_request_summary(request)
|
|
354
|
+
return route_config_proto(request, get_or_create_snowpark_session())
|
|
355
|
+
except Exception as e:
|
|
356
|
+
_handle_exception(context, e)
|
|
357
|
+
finally:
|
|
358
|
+
telemetry.send_request_summary_telemetry()
|
|
359
|
+
|
|
360
|
+
def AddArtifacts(self, request_iterator, context):
|
|
361
|
+
"""Add artifacts to the session and returns a [[AddArtifactsResponse]] containing metadata about
|
|
362
|
+
the added artifacts.
|
|
363
|
+
"""
|
|
364
|
+
logger.info("AddArtifacts")
|
|
365
|
+
session: snowpark.Session = get_or_create_snowpark_session()
|
|
366
|
+
filenames: dict[str, str] = {}
|
|
367
|
+
response: dict[str, proto_base.AddArtifactsResponse.ArtifactSummary] = {}
|
|
368
|
+
# Store accumulated data for local relation cache
|
|
369
|
+
cache_data: dict[str, bytearray] = {}
|
|
370
|
+
|
|
371
|
+
def _try_handle_local_relation(artifact_name: str, data: bytes):
|
|
372
|
+
"""
|
|
373
|
+
Attempt to deserialize the artifact data to a LocalRelation protobuf message.
|
|
374
|
+
LocalRelation messages represent in-memory data that should be materialized
|
|
375
|
+
in temporary table in Snowflake rather than stored as file artifact.
|
|
376
|
+
- If successful: creates a temporary table and caches the DataFrame in `df_cache_map`
|
|
377
|
+
- If unsuccessful: falls back to storing as a regular file artifact
|
|
378
|
+
"""
|
|
379
|
+
|
|
380
|
+
is_likely_local_relation = artifact_name.startswith(
|
|
381
|
+
"cache/"
|
|
382
|
+
) # heuristic to identify local relations
|
|
383
|
+
|
|
384
|
+
def _handle_regular_artifact():
|
|
385
|
+
filenames[artifact_name] = write_artifact(
|
|
386
|
+
session,
|
|
387
|
+
artifact_name,
|
|
388
|
+
data,
|
|
389
|
+
overwrite=True,
|
|
390
|
+
)
|
|
391
|
+
|
|
392
|
+
if is_likely_local_relation:
|
|
393
|
+
try:
|
|
394
|
+
l_relation = relations_proto.LocalRelation()
|
|
395
|
+
l_relation.ParseFromString(data)
|
|
396
|
+
relation = relations_proto.Relation(local_relation=l_relation)
|
|
397
|
+
df_cache_map_put_if_absent(
|
|
398
|
+
(get_session_id(), artifact_name.replace("cache/", "")),
|
|
399
|
+
lambda: map_local_relation(relation), # noqa: B023
|
|
400
|
+
materialize=True,
|
|
401
|
+
)
|
|
402
|
+
except Exception:
|
|
403
|
+
# fallback - treat as regular artifact
|
|
404
|
+
_handle_regular_artifact()
|
|
405
|
+
else:
|
|
406
|
+
# Not a LocalRelation - treat as regular artifact
|
|
407
|
+
_handle_regular_artifact()
|
|
408
|
+
|
|
409
|
+
# Spark sends artifacts as iterators that are either chunked or a full batch.
|
|
410
|
+
#
|
|
411
|
+
# Chunked artifacts start with a "begin_chunk" followed by a series of "chunk"
|
|
412
|
+
# messages. The "chunk" messages do not contain a name, so we store the name
|
|
413
|
+
# in `current_name` so we can append all the chunks to the same object.
|
|
414
|
+
# Chunked artifacts are written incrementally as gzip files to reduce memory
|
|
415
|
+
# issues.
|
|
416
|
+
#
|
|
417
|
+
# Batch artifacts are sent as a single "batch" message containing a list of
|
|
418
|
+
# artifacts. We do not need to keep track of the name since it is included in
|
|
419
|
+
# each artifact.
|
|
420
|
+
current_name: str = ""
|
|
421
|
+
for request in request_iterator:
|
|
422
|
+
clear_context_data()
|
|
423
|
+
set_session_id(request.session_id)
|
|
424
|
+
set_spark_version(request.client_type)
|
|
425
|
+
match request.WhichOneof("payload"):
|
|
426
|
+
case "begin_chunk":
|
|
427
|
+
current_name = request.begin_chunk.name
|
|
428
|
+
assert (
|
|
429
|
+
current_name not in filenames
|
|
430
|
+
), "Duplicate artifact name found."
|
|
431
|
+
|
|
432
|
+
if current_name.startswith("cache/"):
|
|
433
|
+
cache_data[current_name] = bytearray(
|
|
434
|
+
request.begin_chunk.initial_chunk.data
|
|
435
|
+
)
|
|
436
|
+
else:
|
|
437
|
+
filenames[current_name] = write_artifact(
|
|
438
|
+
session,
|
|
439
|
+
current_name,
|
|
440
|
+
request.begin_chunk.initial_chunk.data,
|
|
441
|
+
overwrite=True,
|
|
442
|
+
)
|
|
443
|
+
response[
|
|
444
|
+
current_name
|
|
445
|
+
] = proto_base.AddArtifactsResponse.ArtifactSummary(
|
|
446
|
+
name=current_name,
|
|
447
|
+
is_crc_successful=check_checksum(
|
|
448
|
+
request.begin_chunk.initial_chunk.data,
|
|
449
|
+
request.begin_chunk.initial_chunk.crc,
|
|
450
|
+
),
|
|
451
|
+
)
|
|
452
|
+
case "chunk":
|
|
453
|
+
if current_name.startswith("cache/"):
|
|
454
|
+
cache_data[current_name].extend(request.chunk.data)
|
|
455
|
+
else:
|
|
456
|
+
assert filenames[current_name] == write_artifact(
|
|
457
|
+
session, current_name, request.chunk.data
|
|
458
|
+
), "Artifact staging error."
|
|
459
|
+
|
|
460
|
+
response[
|
|
461
|
+
current_name
|
|
462
|
+
] = proto_base.AddArtifactsResponse.ArtifactSummary(
|
|
463
|
+
name=current_name,
|
|
464
|
+
is_crc_successful=response[current_name].is_crc_successful
|
|
465
|
+
and check_checksum(request.chunk.data, request.chunk.crc),
|
|
466
|
+
)
|
|
467
|
+
case "batch":
|
|
468
|
+
for artifact in request.batch.artifacts:
|
|
469
|
+
data = artifact.data.data
|
|
470
|
+
|
|
471
|
+
_try_handle_local_relation(artifact.name, data)
|
|
472
|
+
response[
|
|
473
|
+
artifact.name
|
|
474
|
+
] = proto_base.AddArtifactsResponse.ArtifactSummary(
|
|
475
|
+
name=artifact.name,
|
|
476
|
+
is_crc_successful=check_checksum(
|
|
477
|
+
artifact.data.data, artifact.data.crc
|
|
478
|
+
),
|
|
479
|
+
)
|
|
480
|
+
case _:
|
|
481
|
+
raise ValueError(
|
|
482
|
+
f"Unexpected payload type in AddArtifacts: {request.WhichOneof('payload')}"
|
|
483
|
+
)
|
|
484
|
+
|
|
485
|
+
for name, data in cache_data.items():
|
|
486
|
+
_try_handle_local_relation(name, bytes(data))
|
|
487
|
+
|
|
488
|
+
for (name, filepath) in filenames.items():
|
|
489
|
+
session.file.put(
|
|
490
|
+
filepath,
|
|
491
|
+
session.get_session_stage(),
|
|
492
|
+
auto_compress=False,
|
|
493
|
+
overwrite=True,
|
|
494
|
+
source_compression="GZIP" if name.endswith(".gz") else "NONE",
|
|
495
|
+
)
|
|
496
|
+
|
|
497
|
+
if name.startswith("cache"):
|
|
498
|
+
continue
|
|
499
|
+
|
|
500
|
+
# Remove temporary stored files which are put on the stage
|
|
501
|
+
os.remove(filepath)
|
|
502
|
+
|
|
503
|
+
# Add only files marked to be used in user generated Python UDFs.
|
|
504
|
+
cached_name = f"{session.get_session_stage()}/{filepath.split('/')[-1]}"
|
|
505
|
+
if not name.startswith("pyfiles") and cached_name in session._python_files:
|
|
506
|
+
session._python_files.remove(cached_name)
|
|
507
|
+
elif name.startswith("pyfiles"):
|
|
508
|
+
session._python_files.add(cached_name)
|
|
509
|
+
|
|
510
|
+
if not name.startswith("pyfiles"):
|
|
511
|
+
session._import_files.add(cached_name)
|
|
512
|
+
|
|
513
|
+
return proto_base.AddArtifactsResponse(artifacts=list(response.values()))
|
|
514
|
+
|
|
515
|
+
def ArtifactStatus(self, request, context):
|
|
516
|
+
"""Check statuses of artifacts in the session and returns them in a [[ArtifactStatusesResponse]]"""
|
|
517
|
+
logger.info("ArtifactStatus")
|
|
518
|
+
clear_context_data()
|
|
519
|
+
set_session_id(request.session_id)
|
|
520
|
+
set_spark_version(request.client_type)
|
|
521
|
+
session: snowpark.Session = get_or_create_snowpark_session()
|
|
522
|
+
if os.name != "nt":
|
|
523
|
+
tmp_path = f"/tmp/sas-{session.session_id}/"
|
|
524
|
+
else:
|
|
525
|
+
tmp_path = f"{tempfile.gettempdir()}/sas-{session.session_id}/"
|
|
526
|
+
|
|
527
|
+
def _is_local_relation_cached(name: str) -> bool:
|
|
528
|
+
if name.startswith("cache/"):
|
|
529
|
+
hash = name.replace("cache/", "")
|
|
530
|
+
cached_df = df_cache_map_get((get_session_id(), hash))
|
|
531
|
+
return cached_df is not None
|
|
532
|
+
return False
|
|
533
|
+
|
|
534
|
+
files = []
|
|
535
|
+
for _, _, filenames in os.walk(tmp_path):
|
|
536
|
+
for filename in filenames:
|
|
537
|
+
files.append(filename)
|
|
538
|
+
if len(files) == 0:
|
|
539
|
+
statuses = {
|
|
540
|
+
name: proto_base.ArtifactStatusesResponse.ArtifactStatus(
|
|
541
|
+
exists=_is_local_relation_cached(name)
|
|
542
|
+
)
|
|
543
|
+
for name in request.names
|
|
544
|
+
}
|
|
545
|
+
else:
|
|
546
|
+
statuses = {
|
|
547
|
+
name: proto_base.ArtifactStatusesResponse.ArtifactStatus(
|
|
548
|
+
exists=(
|
|
549
|
+
_is_local_relation_cached(name)
|
|
550
|
+
or any(name.split("/")[-1] in file for file in files)
|
|
551
|
+
)
|
|
552
|
+
)
|
|
553
|
+
for name in request.names
|
|
554
|
+
}
|
|
555
|
+
return proto_base.ArtifactStatusesResponse(statuses=statuses)
|
|
556
|
+
|
|
557
|
+
def Interrupt(self, request: proto_base.InterruptRequest, context):
|
|
558
|
+
"""Interrupts running executions"""
|
|
559
|
+
logger.info("Interrupt")
|
|
560
|
+
telemetry.initialize_request_summary(request)
|
|
561
|
+
# SAS doesn't support operation ids yet (we use a constant SERVER_SIDE_SESSION_ID mock), so
|
|
562
|
+
# instead of using operation ids, we're relying on Snowflake query ids here, meaning that:
|
|
563
|
+
# - The list of returned interrupted_ids contains query ids of interrupted jobs, instead of their operation ids
|
|
564
|
+
# - INTERRUPT_TYPE_OPERATION_ID interrupt type expects a Snowflake query id instead of an operation id
|
|
565
|
+
try:
|
|
566
|
+
match request.interrupt_type:
|
|
567
|
+
case proto_base.InterruptRequest.InterruptType.INTERRUPT_TYPE_ALL:
|
|
568
|
+
interrupted_ids = interrupt_all_queries()
|
|
569
|
+
case proto_base.InterruptRequest.InterruptType.INTERRUPT_TYPE_TAG:
|
|
570
|
+
interrupted_ids = interrupt_queries_with_tag(request.operation_tag)
|
|
571
|
+
case proto_base.InterruptRequest.InterruptType.INTERRUPT_TYPE_OPERATION_ID:
|
|
572
|
+
interrupted_ids = interrupt_query(request.operation_id)
|
|
573
|
+
case _:
|
|
574
|
+
raise SnowparkConnectNotImplementedError(
|
|
575
|
+
f"INTERRUPT NOT IMPLEMENTED:\n{request}"
|
|
576
|
+
)
|
|
577
|
+
|
|
578
|
+
return proto_base.InterruptResponse(
|
|
579
|
+
session_id=request.session_id,
|
|
580
|
+
interrupted_ids=interrupted_ids,
|
|
581
|
+
)
|
|
582
|
+
except Exception as e:
|
|
583
|
+
_handle_exception(context, e)
|
|
584
|
+
finally:
|
|
585
|
+
telemetry.send_request_summary_telemetry()
|
|
586
|
+
|
|
587
|
+
def ReattachExecute(self, request: proto_base.ReattachExecuteRequest, context):
|
|
588
|
+
"""Reattach to an existing reattachable execution.
|
|
589
|
+
The ExecutePlan must have been started with ReattachOptions.reattachable=true.
|
|
590
|
+
If the ExecutePlanResponse stream ends without a ResultComplete message, there is more to
|
|
591
|
+
continue. If there is a ResultComplete, the client should use ReleaseExecute with
|
|
592
|
+
"""
|
|
593
|
+
logger.info("ReattachExecute")
|
|
594
|
+
raise SnowparkConnectNotImplementedError(
|
|
595
|
+
"Spark client has detached, please resubmit request. In a future version, the server will be support the reattach."
|
|
596
|
+
)
|
|
597
|
+
|
|
598
|
+
def ReleaseExecute(self, request: proto_base.ReleaseExecuteRequest, context):
|
|
599
|
+
"""Release an reattachable execution, or parts thereof.
|
|
600
|
+
The ExecutePlan must have been started with ReattachOptions.reattachable=true.
|
|
601
|
+
Non reattachable executions are released automatically and immediately after the ExecutePlan
|
|
602
|
+
RPC and ReleaseExecute may not be used.
|
|
603
|
+
"""
|
|
604
|
+
try:
|
|
605
|
+
logger.info("ReleaseExecute")
|
|
606
|
+
return proto_base.ReleaseExecuteResponse(
|
|
607
|
+
session_id=request.session_id,
|
|
608
|
+
operation_id=SERVER_SIDE_SESSION_ID,
|
|
609
|
+
)
|
|
610
|
+
except Exception as e:
|
|
611
|
+
_handle_exception(context, e)
|
|
612
|
+
|
|
613
|
+
# TODO: These are required in Spark 4.x.
|
|
614
|
+
# def ReleaseSession(self, request, context):
|
|
615
|
+
# """Release a session.
|
|
616
|
+
# All the executions in the session will be released. Any further requests for the session with
|
|
617
|
+
# that session_id for the given user_id will fail. If the session didn't exist or was already
|
|
618
|
+
# released, this is a noop.
|
|
619
|
+
# """
|
|
620
|
+
# logger.info("ReleaseSession")
|
|
621
|
+
# return super().ReleaseSession(request, context)
|
|
622
|
+
#
|
|
623
|
+
# def FetchErrorDetails(self, request, context):
|
|
624
|
+
# """FetchErrorDetails retrieves the matched exception with details based on a provided error id."""
|
|
625
|
+
# logger.info("FetchErrorDetails")
|
|
626
|
+
# return super().FetchErrorDetails(request, context)
|
|
627
|
+
|
|
628
|
+
|
|
629
|
+
# Global state related to server connection
|
|
630
|
+
_server_running: threading.Event = threading.Event()
|
|
631
|
+
_server_error: bool = False
|
|
632
|
+
_server_url: Optional[str] = None
|
|
633
|
+
_client_url: Optional[str] = None
|
|
634
|
+
|
|
635
|
+
|
|
636
|
+
# Used to reset server global state to the initial blank slate state if error happens during server startup.
|
|
637
|
+
# Called after the startup error is caught and handled / logged etc.
|
|
638
|
+
def _reset_server_run_state():
|
|
639
|
+
global _server_running, _server_error, _server_url, _client_url
|
|
640
|
+
_server_running.clear()
|
|
641
|
+
_server_error = False
|
|
642
|
+
_server_url = None
|
|
643
|
+
_client_url = None
|
|
644
|
+
|
|
645
|
+
|
|
646
|
+
def _stop_server(stop_event: threading.Event, server: grpc.Server):
|
|
647
|
+
stop_event.wait()
|
|
648
|
+
server.stop(0)
|
|
649
|
+
_reset_server_run_state()
|
|
650
|
+
logger.info("server stop sent")
|
|
651
|
+
|
|
652
|
+
|
|
653
|
+
def _serve(
|
|
654
|
+
stop_event: Optional[threading.Event] = None,
|
|
655
|
+
session: Optional[snowpark.Session] = None,
|
|
656
|
+
):
|
|
657
|
+
global _server_running, _server_error
|
|
658
|
+
# TODO: factor out the Snowflake connection code.
|
|
659
|
+
server = None
|
|
660
|
+
try:
|
|
661
|
+
config_snowpark()
|
|
662
|
+
if session is None:
|
|
663
|
+
session = get_or_create_snowpark_session()
|
|
664
|
+
else:
|
|
665
|
+
# If a session is passed in, explicitly call config session to be consistent with sessions created
|
|
666
|
+
# under the hood.
|
|
667
|
+
configure_snowpark_session(session)
|
|
668
|
+
if tcm.TCM_MODE:
|
|
669
|
+
# No need to start grpc server in TCM
|
|
670
|
+
return
|
|
671
|
+
|
|
672
|
+
server_options = [
|
|
673
|
+
("grpc.max_receive_message_length", _SPARK_CONNECT_GRPC_MAX_MESSAGE_SIZE),
|
|
674
|
+
("grpc.max_metadata_size", _SPARK_CONNECT_GRPC_MAX_METADATA_SIZE),
|
|
675
|
+
(
|
|
676
|
+
"grpc.absolute_max_metadata_size",
|
|
677
|
+
_SPARK_CONNECT_GRPC_MAX_METADATA_SIZE * 2,
|
|
678
|
+
),
|
|
679
|
+
]
|
|
680
|
+
server = grpc.server(
|
|
681
|
+
futures.ThreadPoolExecutor(max_workers=10), options=server_options
|
|
682
|
+
)
|
|
683
|
+
control_servicer = ControlServicer(session)
|
|
684
|
+
proto_base_grpc.add_SparkConnectServiceServicer_to_server(
|
|
685
|
+
SnowflakeConnectServicer(control_servicer.log_spark_connect_batch),
|
|
686
|
+
server,
|
|
687
|
+
)
|
|
688
|
+
control_grpc.add_ControlServiceServicer_to_server(control_servicer, server)
|
|
689
|
+
server_url = get_server_url()
|
|
690
|
+
server.add_insecure_port(server_url)
|
|
691
|
+
logger.info(f"Starting Snowpark Connect server on {server_url}...")
|
|
692
|
+
server.start()
|
|
693
|
+
_server_running.set()
|
|
694
|
+
logger.info("Snowpark Connect server started!")
|
|
695
|
+
telemetry.send_server_started_telemetry()
|
|
696
|
+
if stop_event is not None:
|
|
697
|
+
# start a background thread to listen for stop event and terminate the server
|
|
698
|
+
threading.Thread(
|
|
699
|
+
target=_stop_server, args=(stop_event, server), daemon=True
|
|
700
|
+
).start()
|
|
701
|
+
server.wait_for_termination()
|
|
702
|
+
except Exception as e:
|
|
703
|
+
_server_error = True
|
|
704
|
+
_server_running.set() # unblock any client sessions
|
|
705
|
+
if "Invalid connection_name 'spark-connect', known ones are " in str(e):
|
|
706
|
+
logger.error(
|
|
707
|
+
"Ensure 'spark-connect' connection config has been set correctly in connections.toml."
|
|
708
|
+
)
|
|
709
|
+
else:
|
|
710
|
+
logger.error("Error starting up Snowpark Connect server", exc_info=True)
|
|
711
|
+
raise e
|
|
712
|
+
finally:
|
|
713
|
+
# flush the telemetry queue if possible
|
|
714
|
+
telemetry.shutdown()
|
|
715
|
+
|
|
716
|
+
|
|
717
|
+
def _set_remote_url(remote_url: str):
|
|
718
|
+
global _server_url, _client_url
|
|
719
|
+
_client_url = remote_url
|
|
720
|
+
parsed_url = urllib.parse.urlparse(remote_url)
|
|
721
|
+
if parsed_url.scheme == "sc":
|
|
722
|
+
_server_url = parsed_url.netloc
|
|
723
|
+
server_port = parsed_url.port or DEFAULT_PORT
|
|
724
|
+
_check_port_is_free(server_port)
|
|
725
|
+
elif parsed_url.scheme == "unix":
|
|
726
|
+
_server_url = remote_url.split("/;")[0]
|
|
727
|
+
else:
|
|
728
|
+
raise RuntimeError(f"Invalid Snowpark Connect URL: {remote_url}")
|
|
729
|
+
|
|
730
|
+
|
|
731
|
+
def _set_server_tcp_port(server_port: int):
|
|
732
|
+
global _server_url, _client_url
|
|
733
|
+
_check_port_is_free(server_port)
|
|
734
|
+
_server_url = f"[::]:{server_port}"
|
|
735
|
+
_client_url = f"sc://127.0.0.1:{server_port}"
|
|
736
|
+
|
|
737
|
+
|
|
738
|
+
def _check_port_is_free(port: int) -> None:
|
|
739
|
+
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
|
|
740
|
+
s.settimeout(1)
|
|
741
|
+
if s.connect_ex(("127.0.0.1", port)) == 0:
|
|
742
|
+
raise RuntimeError(f"TCP port {port} is already in use")
|
|
743
|
+
|
|
744
|
+
|
|
745
|
+
def _set_server_unix_domain_socket(path: str):
|
|
746
|
+
global _server_url, _client_url
|
|
747
|
+
_server_url = f"unix:{path}"
|
|
748
|
+
_client_url = f"unix:{path}"
|
|
749
|
+
|
|
750
|
+
|
|
751
|
+
def get_server_url() -> str:
|
|
752
|
+
global _server_url
|
|
753
|
+
if not _server_url:
|
|
754
|
+
raise RuntimeError("Server URL not set")
|
|
755
|
+
return _server_url
|
|
756
|
+
|
|
757
|
+
|
|
758
|
+
def get_client_url() -> str:
|
|
759
|
+
global _client_url
|
|
760
|
+
if not _client_url:
|
|
761
|
+
raise RuntimeError("Client URL not set")
|
|
762
|
+
return _client_url
|
|
763
|
+
|
|
764
|
+
|
|
765
|
+
def _make_unix_domain_socket() -> str:
|
|
766
|
+
parent_dir = tempfile.mkdtemp()
|
|
767
|
+
server_path = os.path.join(parent_dir, "snowflake_sas_grpc.sock")
|
|
768
|
+
atexit.register(_cleanup_unix_domain_socket, server_path)
|
|
769
|
+
return server_path
|
|
770
|
+
|
|
771
|
+
|
|
772
|
+
def _cleanup_unix_domain_socket(server_path: str) -> None:
|
|
773
|
+
parent_dir = os.path.dirname(server_path)
|
|
774
|
+
if os.path.exists(server_path):
|
|
775
|
+
os.remove(server_path)
|
|
776
|
+
if os.path.exists(parent_dir):
|
|
777
|
+
os.rmdir(parent_dir)
|
|
778
|
+
|
|
779
|
+
|
|
780
|
+
class UnixDomainSocketChannelBuilder(ChannelBuilder):
|
|
781
|
+
"""
|
|
782
|
+
Spark Connect gRPC channel builder for Unix domain sockets
|
|
783
|
+
"""
|
|
784
|
+
|
|
785
|
+
def __init__(self, channelOptions: Optional[List[Tuple[str, Any]]] = None) -> None:
|
|
786
|
+
url: str = get_client_url()
|
|
787
|
+
if url[:6] != "unix:/" or len(url) < 7:
|
|
788
|
+
raise PySparkValueError(
|
|
789
|
+
error_class="INVALID_CONNECT_URL",
|
|
790
|
+
message_parameters={
|
|
791
|
+
"detail": "The URL must start with 'unix://'. Please update the URL to follow the correct format, e.g., 'unix://unix_domain_socket_path'.",
|
|
792
|
+
},
|
|
793
|
+
)
|
|
794
|
+
|
|
795
|
+
# Rewrite the URL to use http as the scheme so that we can leverage
|
|
796
|
+
# Python's built-in parser to parse URL parameters
|
|
797
|
+
fake_url = "http://" + url[6:]
|
|
798
|
+
self.url = urllib.parse.urlparse(fake_url)
|
|
799
|
+
self.params: Dict[str, str] = {}
|
|
800
|
+
self._extract_attributes()
|
|
801
|
+
|
|
802
|
+
# Now parse the real unix domain socket URL
|
|
803
|
+
self.url = urllib.parse.urlparse(url)
|
|
804
|
+
|
|
805
|
+
GRPC_DEFAULT_OPTIONS = [
|
|
806
|
+
("grpc.max_send_message_length", _SPARK_CONNECT_GRPC_MAX_MESSAGE_SIZE),
|
|
807
|
+
("grpc.max_receive_message_length", _SPARK_CONNECT_GRPC_MAX_MESSAGE_SIZE),
|
|
808
|
+
("grpc.max_metadata_size", _SPARK_CONNECT_GRPC_MAX_METADATA_SIZE),
|
|
809
|
+
(
|
|
810
|
+
"grpc.absolute_max_metadata_size",
|
|
811
|
+
2 * _SPARK_CONNECT_GRPC_MAX_METADATA_SIZE,
|
|
812
|
+
),
|
|
813
|
+
]
|
|
814
|
+
|
|
815
|
+
if channelOptions is None:
|
|
816
|
+
self._channel_options = GRPC_DEFAULT_OPTIONS
|
|
817
|
+
else:
|
|
818
|
+
self._channel_options = GRPC_DEFAULT_OPTIONS + channelOptions
|
|
819
|
+
# For Spark 4.0 support, but also backwards compatible.
|
|
820
|
+
self._params = self.params
|
|
821
|
+
|
|
822
|
+
def _extract_attributes(self) -> None:
|
|
823
|
+
"""Extract attributes from parameters.
|
|
824
|
+
|
|
825
|
+
This method was copied from
|
|
826
|
+
https://github.com/apache/spark/blob/branch-3.5/python/pyspark/sql/connect/client/core.py
|
|
827
|
+
|
|
828
|
+
This is required for Spark 4.0 support, since it is dropped in favor of moving
|
|
829
|
+
the extraction logic into the constructor.
|
|
830
|
+
"""
|
|
831
|
+
if len(self.url.params) > 0:
|
|
832
|
+
parts = self.url.params.split(";")
|
|
833
|
+
for p in parts:
|
|
834
|
+
kv = p.split("=")
|
|
835
|
+
if len(kv) != 2:
|
|
836
|
+
raise PySparkValueError(
|
|
837
|
+
error_class="INVALID_CONNECT_URL",
|
|
838
|
+
message_parameters={
|
|
839
|
+
"detail": f"Parameter '{p}' should be provided as a "
|
|
840
|
+
f"key-value pair separated by an equal sign (=). Please update "
|
|
841
|
+
f"the parameter to follow the correct format, e.g., 'key=value'.",
|
|
842
|
+
},
|
|
843
|
+
)
|
|
844
|
+
self.params[kv[0]] = urllib.parse.unquote(kv[1])
|
|
845
|
+
|
|
846
|
+
netloc = self.url.netloc.split(":")
|
|
847
|
+
if len(netloc) == 1:
|
|
848
|
+
self.host = netloc[0]
|
|
849
|
+
if version.parse(pyspark.__version__) >= version.parse("4.0.0"):
|
|
850
|
+
from pyspark.sql.connect.client.core import DefaultChannelBuilder
|
|
851
|
+
|
|
852
|
+
self.port = DefaultChannelBuilder.default_port()
|
|
853
|
+
else:
|
|
854
|
+
self.port = ChannelBuilder.default_port()
|
|
855
|
+
elif len(netloc) == 2:
|
|
856
|
+
self.host = netloc[0]
|
|
857
|
+
self.port = int(netloc[1])
|
|
858
|
+
else:
|
|
859
|
+
raise PySparkValueError(
|
|
860
|
+
error_class="INVALID_CONNECT_URL",
|
|
861
|
+
message_parameters={
|
|
862
|
+
"detail": f"Target destination '{self.url.netloc}' should match the "
|
|
863
|
+
f"'<host>:<port>' pattern. Please update the destination to follow "
|
|
864
|
+
f"the correct format, e.g., 'hostname:port'.",
|
|
865
|
+
},
|
|
866
|
+
)
|
|
867
|
+
|
|
868
|
+
# We override this to enable compatibility with Spark 4.0
|
|
869
|
+
host = None
|
|
870
|
+
|
|
871
|
+
@property
|
|
872
|
+
def endpoint(self) -> str:
|
|
873
|
+
return f"{self.url.scheme}:{self.url.path}"
|
|
874
|
+
|
|
875
|
+
def toChannel(self) -> grpc.Channel:
|
|
876
|
+
return grpc.insecure_channel(self.endpoint, options=self._channel_options)
|
|
877
|
+
|
|
878
|
+
|
|
879
|
+
def config_snowpark() -> None:
|
|
880
|
+
"""
|
|
881
|
+
Some snowpark configs required by SAS.
|
|
882
|
+
"""
|
|
883
|
+
|
|
884
|
+
# Enable structType. Require snowpark 1.27.0 or snowpark main branch after commit 888cec55c4
|
|
885
|
+
import snowflake.snowpark.context as context
|
|
886
|
+
|
|
887
|
+
context._use_structured_type_semantics = True
|
|
888
|
+
context._is_snowpark_connect_compatible_mode = True
|
|
889
|
+
|
|
890
|
+
|
|
891
|
+
def start_jvm():
|
|
892
|
+
# The JVM is used to run the Spark parser and JDBC drivers,
|
|
893
|
+
# so needs to be configured to support both.
|
|
894
|
+
|
|
895
|
+
# JDBC driver .jars are added using the CLASSPATH env var.
|
|
896
|
+
# We then add the Spark parser jars (that are shipped with pyspark)
|
|
897
|
+
# by appending them to the default classpath.
|
|
898
|
+
|
|
899
|
+
# Since we need to control JVM's parameters, fail immediately
|
|
900
|
+
# if the JVM has already been started elsewhere.
|
|
901
|
+
if jpype.isJVMStarted():
|
|
902
|
+
if tcm.TCM_MODE:
|
|
903
|
+
# No-op if JVM is already started in TCM mode
|
|
904
|
+
return
|
|
905
|
+
raise RuntimeError(
|
|
906
|
+
"JVM must not be running when starting the Spark Connect server"
|
|
907
|
+
)
|
|
908
|
+
|
|
909
|
+
pyspark_jars = (
|
|
910
|
+
pathlib.Path(snowflake.snowpark_connect.__file__).parent / "includes/jars"
|
|
911
|
+
)
|
|
912
|
+
|
|
913
|
+
if "dataframe_processor.zip" in str(pyspark_jars):
|
|
914
|
+
# importlib.resource doesn't work when local stage package is used in TCM
|
|
915
|
+
zip_path = pathlib.Path(
|
|
916
|
+
snowflake.snowpark_connect.__file__
|
|
917
|
+
).parent.parent.parent
|
|
918
|
+
temp_dir = tempfile.gettempdir()
|
|
919
|
+
|
|
920
|
+
extract_folder = "snowflake/snowpark_connect/includes/jars/" # Folder to extract (must end with '/')
|
|
921
|
+
|
|
922
|
+
with zipfile.ZipFile(zip_path, "r") as zip_ref:
|
|
923
|
+
for member in zip_ref.namelist():
|
|
924
|
+
if member.startswith(extract_folder):
|
|
925
|
+
zip_ref.extract(member, path=temp_dir)
|
|
926
|
+
|
|
927
|
+
pyspark_jars = pathlib.Path(temp_dir) / extract_folder
|
|
928
|
+
|
|
929
|
+
for path in pyspark_jars.glob("**/*.jar"):
|
|
930
|
+
jpype.addClassPath(path)
|
|
931
|
+
|
|
932
|
+
# TODO: Should remove convertStrings, but it breaks the JDBC code.
|
|
933
|
+
jvm_settings: list[str] = list(
|
|
934
|
+
filter(
|
|
935
|
+
lambda e: e != "",
|
|
936
|
+
os.environ.get("JAVA_OPTS", "").split(),
|
|
937
|
+
)
|
|
938
|
+
)
|
|
939
|
+
# Add JVM memory constraints to reduce memory usage
|
|
940
|
+
jpype.startJVM(
|
|
941
|
+
*jvm_settings,
|
|
942
|
+
convertStrings=True,
|
|
943
|
+
)
|
|
944
|
+
|
|
945
|
+
|
|
946
|
+
def start_session(
|
|
947
|
+
is_daemon: bool = True,
|
|
948
|
+
remote_url: Optional[str] = None,
|
|
949
|
+
tcp_port: Optional[int] = None,
|
|
950
|
+
unix_domain_socket: Optional[str] = None,
|
|
951
|
+
stop_event: threading.Event = None,
|
|
952
|
+
snowpark_session: Optional[snowpark.Session] = None,
|
|
953
|
+
connection_parameters: Optional[Dict[str, str]] = None,
|
|
954
|
+
) -> threading.Thread | None:
|
|
955
|
+
"""
|
|
956
|
+
Starts Spark Connect server connected to Snowflake. No-op if the Server is already running.
|
|
957
|
+
|
|
958
|
+
Parameters:
|
|
959
|
+
is_daemon (bool): Should run the server as daemon or not. use True to automatically shut the Spark connect
|
|
960
|
+
server down when the main program (or test) finishes. use False to start the server in a
|
|
961
|
+
stand-alone, long-running mode.
|
|
962
|
+
remote_url (Optional[str]): sc:// URL on which to start the Spark Connect server. This option is incompatible with the tcp_port
|
|
963
|
+
and unix_domain_socket parameters.
|
|
964
|
+
tcp_port (Optional[int]): TCP port on which to start the Spark Connect server. This option is incompatible with
|
|
965
|
+
the remote_url and unix_domain_socket parameters.
|
|
966
|
+
unix_domain_socket (Optional[str]): Path to the unix domain socket on which to start the Spark Connect server.
|
|
967
|
+
This option is incompatible with the remote_url and tcp_port parameters.
|
|
968
|
+
stop_event (Optional[threading.Event]): Stop the SAS server when stop_event.set() is called.
|
|
969
|
+
Only works when is_daemon=True.
|
|
970
|
+
snowpark_session: A Snowpark session to use for this connection; currently the only applicable use of this is to
|
|
971
|
+
pass in the session created by the stored proc environment.
|
|
972
|
+
connection_parameters: A dictionary of connection parameters to use to create the Snowpark session. If this is
|
|
973
|
+
provided, the `snowpark_session` parameter must be None.
|
|
974
|
+
"""
|
|
975
|
+
try:
|
|
976
|
+
if os.environ.get("SPARK_ENV_LOADED"):
|
|
977
|
+
raise RuntimeError(
|
|
978
|
+
"Snowpark Connect cannot be run inside of a Spark environment"
|
|
979
|
+
)
|
|
980
|
+
if connection_parameters is not None:
|
|
981
|
+
if snowpark_session is not None:
|
|
982
|
+
raise ValueError(
|
|
983
|
+
"Only specify one of snowpark_session and connection_parameters"
|
|
984
|
+
)
|
|
985
|
+
snowpark_session = snowpark.Session.builder.configs(
|
|
986
|
+
connection_parameters
|
|
987
|
+
).create()
|
|
988
|
+
|
|
989
|
+
global _server_running, _server_error
|
|
990
|
+
if _server_running.is_set():
|
|
991
|
+
url = get_client_url()
|
|
992
|
+
logger.warning(f"Snowpark Connect session is already running at {url}")
|
|
993
|
+
return
|
|
994
|
+
|
|
995
|
+
if len(list(filter(None, [remote_url, tcp_port, unix_domain_socket]))) > 1:
|
|
996
|
+
raise RuntimeError(
|
|
997
|
+
"Can only set at most one of remote_url, tcp_port, and unix_domain_socket"
|
|
998
|
+
)
|
|
999
|
+
|
|
1000
|
+
url_from_env = os.environ.get("SPARK_REMOTE", None)
|
|
1001
|
+
if remote_url:
|
|
1002
|
+
_set_remote_url(remote_url)
|
|
1003
|
+
elif tcp_port:
|
|
1004
|
+
_set_server_tcp_port(tcp_port)
|
|
1005
|
+
elif unix_domain_socket:
|
|
1006
|
+
_set_server_unix_domain_socket(unix_domain_socket)
|
|
1007
|
+
elif url_from_env:
|
|
1008
|
+
# Spark clients use environment variable SPARK_REMOTE to figure out Spark Connect URL. If none of the
|
|
1009
|
+
# connection properties (remote_url, tcp_port, unix_domain_socket) are explicitly passed in to this
|
|
1010
|
+
# function then we should try and mimic clients' behavior
|
|
1011
|
+
# i.e. read the server URL from the SPARK_REMOTE environment variable.
|
|
1012
|
+
_set_remote_url(url_from_env)
|
|
1013
|
+
else:
|
|
1014
|
+
# No connection properties can be found at all - either as arguments to this function or int the environment
|
|
1015
|
+
# variable. We use random, unique Unix Domain Socket as a last fallback. Client can connect to this randomly
|
|
1016
|
+
# generated UDS port using snowpark_connect.get_session().
|
|
1017
|
+
# Mostly used in stored procs and Notebooks to avoid port conflicts.
|
|
1018
|
+
if os.name == "nt":
|
|
1019
|
+
# Windows does not support unix domain sockets, so use default TCP port instead.
|
|
1020
|
+
_set_server_tcp_port(DEFAULT_PORT)
|
|
1021
|
+
else:
|
|
1022
|
+
# Generate unique, random UDS port. Mostly useful in stored proc environment to avoid port conflicts.
|
|
1023
|
+
unix_domain_socket = _make_unix_domain_socket()
|
|
1024
|
+
_set_server_unix_domain_socket(unix_domain_socket)
|
|
1025
|
+
|
|
1026
|
+
start_jvm()
|
|
1027
|
+
_disable_protobuf_recursion_limit()
|
|
1028
|
+
|
|
1029
|
+
if is_daemon:
|
|
1030
|
+
arguments = (stop_event, snowpark_session)
|
|
1031
|
+
# `daemon=True` ensures the server thread exits when script finishes.
|
|
1032
|
+
server_thread = threading.Thread(target=_serve, args=arguments, daemon=True)
|
|
1033
|
+
server_thread.start()
|
|
1034
|
+
_server_running.wait()
|
|
1035
|
+
if _server_error:
|
|
1036
|
+
raise RuntimeError("Snowpark Connect session failed to start")
|
|
1037
|
+
return server_thread
|
|
1038
|
+
else:
|
|
1039
|
+
# Launch in the foreground.
|
|
1040
|
+
_serve(session=snowpark_session)
|
|
1041
|
+
except Exception as e:
|
|
1042
|
+
_reset_server_run_state()
|
|
1043
|
+
logger.error(e, exc_info=True)
|
|
1044
|
+
raise e
|
|
1045
|
+
|
|
1046
|
+
|
|
1047
|
+
def get_session(url: Optional[str] = None, conf: SparkConf = None) -> SparkSession:
|
|
1048
|
+
"""
|
|
1049
|
+
Returns spark connect session
|
|
1050
|
+
|
|
1051
|
+
Parameters:
|
|
1052
|
+
url (Optional[str]): Spark connect server URL. Uses default server URL if none is provided.
|
|
1053
|
+
|
|
1054
|
+
Returns:
|
|
1055
|
+
A new spark connect session
|
|
1056
|
+
|
|
1057
|
+
Raises:
|
|
1058
|
+
RuntimeError: If Spark Connect server is not started.
|
|
1059
|
+
"""
|
|
1060
|
+
try:
|
|
1061
|
+
if not url:
|
|
1062
|
+
url = get_client_url()
|
|
1063
|
+
|
|
1064
|
+
if url.startswith("unix:/"):
|
|
1065
|
+
b = SparkSession.builder.channelBuilder(UnixDomainSocketChannelBuilder())
|
|
1066
|
+
else:
|
|
1067
|
+
b = SparkSession.builder.remote(url)
|
|
1068
|
+
|
|
1069
|
+
if conf is not None:
|
|
1070
|
+
for k, v in conf.getAll():
|
|
1071
|
+
b.config(k, v)
|
|
1072
|
+
|
|
1073
|
+
return b.getOrCreate()
|
|
1074
|
+
except Exception as e:
|
|
1075
|
+
_reset_server_run_state()
|
|
1076
|
+
logger.error(e, exc_info=True)
|
|
1077
|
+
raise e
|
|
1078
|
+
|
|
1079
|
+
|
|
1080
|
+
def init_spark_session(conf: SparkConf = None) -> SparkSession:
|
|
1081
|
+
try:
|
|
1082
|
+
# For Notebooks on SPCS
|
|
1083
|
+
from jdk4py import JAVA_HOME
|
|
1084
|
+
|
|
1085
|
+
os.environ["JAVA_HOME"] = str(JAVA_HOME)
|
|
1086
|
+
except ModuleNotFoundError:
|
|
1087
|
+
# For notebooks on Warehouse
|
|
1088
|
+
os.environ["JAVA_HOME"] = os.environ["CONDA_PREFIX"]
|
|
1089
|
+
os.environ["JAVA_LD_LIBRARY_PATH"] = os.path.join(
|
|
1090
|
+
os.environ["CONDA_PREFIX"], "lib", "server"
|
|
1091
|
+
)
|
|
1092
|
+
logger.info("JAVA_HOME=%s", os.environ["JAVA_HOME"])
|
|
1093
|
+
|
|
1094
|
+
os.environ["SPARK_LOCAL_HOSTNAME"] = "127.0.0.1"
|
|
1095
|
+
os.environ["SPARK_CONNECT_MODE_ENABLED"] = "1"
|
|
1096
|
+
|
|
1097
|
+
snowpark_session = snowpark.context.get_active_session()
|
|
1098
|
+
start_session(snowpark_session=snowpark_session)
|
|
1099
|
+
return get_session(conf=conf)
|
|
1100
|
+
|
|
1101
|
+
|
|
1102
|
+
def enable_debug_logging():
|
|
1103
|
+
logger.setLevel(logging.DEBUG)
|
|
1104
|
+
for handler in logger.handlers:
|
|
1105
|
+
handler.setLevel(logging.DEBUG)
|
|
1106
|
+
|
|
1107
|
+
|
|
1108
|
+
def _get_files_metadata(data_source: relations_proto.Read.DataSource) -> List[str]:
|
|
1109
|
+
# TODO: Handle paths on the cloud
|
|
1110
|
+
paths = data_source.paths
|
|
1111
|
+
extension = data_source.format if data_source.format != "text" else "txt"
|
|
1112
|
+
files = []
|
|
1113
|
+
for path in paths:
|
|
1114
|
+
if os.path.isfile(path):
|
|
1115
|
+
files.append(f"file://{path}")
|
|
1116
|
+
else:
|
|
1117
|
+
files.extend(
|
|
1118
|
+
[
|
|
1119
|
+
f"file://{path}/{f}"
|
|
1120
|
+
for f in os.listdir(path)
|
|
1121
|
+
if f.endswith(extension)
|
|
1122
|
+
]
|
|
1123
|
+
)
|
|
1124
|
+
return files
|
|
1125
|
+
|
|
1126
|
+
|
|
1127
|
+
def _disable_protobuf_recursion_limit():
|
|
1128
|
+
# https://github.com/protocolbuffers/protobuf/blob/960e79087b332583c80537c949621108a85aa442/src/google/protobuf/io/coded_stream.h#L616
|
|
1129
|
+
# Disable protobuf recursion limit (default 100) because Spark workloads often produce deeply nested execution plans. For example:
|
|
1130
|
+
# - Queries with many unions
|
|
1131
|
+
# - Complex expressions with multiple levels of nesting
|
|
1132
|
+
# Without this, legitimate Spark queries would fail with `(DecodeError) Error parsing message with type 'spark.connect.Relation'` error.
|
|
1133
|
+
# see test_sql_resulting_in_nested_protobuf
|
|
1134
|
+
from google.protobuf.pyext import cpp_message
|
|
1135
|
+
|
|
1136
|
+
cpp_message._message.SetAllowOversizeProtos(True)
|