snowpark-connect 0.20.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of snowpark-connect might be problematic. Click here for more details.
- snowflake/snowpark_connect/__init__.py +23 -0
- snowflake/snowpark_connect/analyze_plan/__init__.py +3 -0
- snowflake/snowpark_connect/analyze_plan/map_tree_string.py +38 -0
- snowflake/snowpark_connect/column_name_handler.py +735 -0
- snowflake/snowpark_connect/config.py +576 -0
- snowflake/snowpark_connect/constants.py +47 -0
- snowflake/snowpark_connect/control_server.py +52 -0
- snowflake/snowpark_connect/dataframe_name_handler.py +54 -0
- snowflake/snowpark_connect/date_time_format_mapping.py +399 -0
- snowflake/snowpark_connect/empty_dataframe.py +18 -0
- snowflake/snowpark_connect/error/__init__.py +11 -0
- snowflake/snowpark_connect/error/error_mapping.py +6174 -0
- snowflake/snowpark_connect/error/error_utils.py +321 -0
- snowflake/snowpark_connect/error/exceptions.py +24 -0
- snowflake/snowpark_connect/execute_plan/__init__.py +3 -0
- snowflake/snowpark_connect/execute_plan/map_execution_command.py +204 -0
- snowflake/snowpark_connect/execute_plan/map_execution_root.py +173 -0
- snowflake/snowpark_connect/execute_plan/utils.py +183 -0
- snowflake/snowpark_connect/expression/__init__.py +3 -0
- snowflake/snowpark_connect/expression/literal.py +90 -0
- snowflake/snowpark_connect/expression/map_cast.py +343 -0
- snowflake/snowpark_connect/expression/map_expression.py +293 -0
- snowflake/snowpark_connect/expression/map_extension.py +104 -0
- snowflake/snowpark_connect/expression/map_sql_expression.py +633 -0
- snowflake/snowpark_connect/expression/map_udf.py +142 -0
- snowflake/snowpark_connect/expression/map_unresolved_attribute.py +241 -0
- snowflake/snowpark_connect/expression/map_unresolved_extract_value.py +85 -0
- snowflake/snowpark_connect/expression/map_unresolved_function.py +9450 -0
- snowflake/snowpark_connect/expression/map_unresolved_star.py +218 -0
- snowflake/snowpark_connect/expression/map_update_fields.py +164 -0
- snowflake/snowpark_connect/expression/map_window_function.py +258 -0
- snowflake/snowpark_connect/expression/typer.py +125 -0
- snowflake/snowpark_connect/includes/__init__.py +0 -0
- snowflake/snowpark_connect/includes/jars/antlr4-runtime-4.9.3.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-cli-1.5.0.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-codec-1.16.1.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-collections-3.2.2.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-collections4-4.4.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-compiler-3.1.9.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-compress-1.26.0.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-crypto-1.1.0.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-dbcp-1.4.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-io-2.16.1.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-lang-2.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-lang3-3.12.0.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-logging-1.1.3.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-math3-3.6.1.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-pool-1.5.4.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-text-1.10.0.jar +0 -0
- snowflake/snowpark_connect/includes/jars/hadoop-client-api-3.3.4.jar +0 -0
- snowflake/snowpark_connect/includes/jars/jackson-annotations-2.15.2.jar +0 -0
- snowflake/snowpark_connect/includes/jars/jackson-core-2.15.2.jar +0 -0
- snowflake/snowpark_connect/includes/jars/jackson-core-asl-1.9.13.jar +0 -0
- snowflake/snowpark_connect/includes/jars/jackson-databind-2.15.2.jar +0 -0
- snowflake/snowpark_connect/includes/jars/jackson-dataformat-yaml-2.15.2.jar +0 -0
- snowflake/snowpark_connect/includes/jars/jackson-datatype-jsr310-2.15.2.jar +0 -0
- snowflake/snowpark_connect/includes/jars/jackson-mapper-asl-1.9.13.jar +0 -0
- snowflake/snowpark_connect/includes/jars/jackson-module-scala_2.12-2.15.2.jar +0 -0
- snowflake/snowpark_connect/includes/jars/json4s-ast_2.12-3.7.0-M11.jar +0 -0
- snowflake/snowpark_connect/includes/jars/json4s-core_2.12-3.7.0-M11.jar +0 -0
- snowflake/snowpark_connect/includes/jars/json4s-jackson_2.12-3.7.0-M11.jar +0 -0
- snowflake/snowpark_connect/includes/jars/json4s-scalap_2.12-3.7.0-M11.jar +0 -0
- snowflake/snowpark_connect/includes/jars/kryo-shaded-4.0.2.jar +0 -0
- snowflake/snowpark_connect/includes/jars/log4j-1.2-api-2.20.0.jar +0 -0
- snowflake/snowpark_connect/includes/jars/log4j-api-2.20.0.jar +0 -0
- snowflake/snowpark_connect/includes/jars/log4j-core-2.20.0.jar +0 -0
- snowflake/snowpark_connect/includes/jars/log4j-slf4j2-impl-2.20.0.jar +0 -0
- snowflake/snowpark_connect/includes/jars/paranamer-2.8.jar +0 -0
- snowflake/snowpark_connect/includes/jars/scala-collection-compat_2.12-2.7.0.jar +0 -0
- snowflake/snowpark_connect/includes/jars/scala-compiler-2.12.18.jar +0 -0
- snowflake/snowpark_connect/includes/jars/scala-library-2.12.18.jar +0 -0
- snowflake/snowpark_connect/includes/jars/scala-parser-combinators_2.12-2.3.0.jar +0 -0
- snowflake/snowpark_connect/includes/jars/scala-reflect-2.12.18.jar +0 -0
- snowflake/snowpark_connect/includes/jars/scala-xml_2.12-2.1.0.jar +0 -0
- snowflake/snowpark_connect/includes/jars/slf4j-api-2.0.7.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-catalyst_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-common-utils_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-core_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-graphx_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-hive-thriftserver_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-hive_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-kubernetes_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-kvstore_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-launcher_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-mesos_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-mllib-local_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-mllib_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-network-common_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-network-shuffle_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-repl_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-sketch_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-sql-api_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-sql_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-streaming_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-tags_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-unsafe_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-yarn_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/python/__init__.py +21 -0
- snowflake/snowpark_connect/includes/python/pyspark/__init__.py +173 -0
- snowflake/snowpark_connect/includes/python/pyspark/_globals.py +71 -0
- snowflake/snowpark_connect/includes/python/pyspark/_typing.pyi +43 -0
- snowflake/snowpark_connect/includes/python/pyspark/accumulators.py +341 -0
- snowflake/snowpark_connect/includes/python/pyspark/broadcast.py +383 -0
- snowflake/snowpark_connect/includes/python/pyspark/cloudpickle/__init__.py +8 -0
- snowflake/snowpark_connect/includes/python/pyspark/cloudpickle/cloudpickle.py +948 -0
- snowflake/snowpark_connect/includes/python/pyspark/cloudpickle/cloudpickle_fast.py +844 -0
- snowflake/snowpark_connect/includes/python/pyspark/cloudpickle/compat.py +18 -0
- snowflake/snowpark_connect/includes/python/pyspark/conf.py +276 -0
- snowflake/snowpark_connect/includes/python/pyspark/context.py +2601 -0
- snowflake/snowpark_connect/includes/python/pyspark/daemon.py +218 -0
- snowflake/snowpark_connect/includes/python/pyspark/errors/__init__.py +70 -0
- snowflake/snowpark_connect/includes/python/pyspark/errors/error_classes.py +889 -0
- snowflake/snowpark_connect/includes/python/pyspark/errors/exceptions/__init__.py +16 -0
- snowflake/snowpark_connect/includes/python/pyspark/errors/exceptions/base.py +228 -0
- snowflake/snowpark_connect/includes/python/pyspark/errors/exceptions/captured.py +307 -0
- snowflake/snowpark_connect/includes/python/pyspark/errors/exceptions/connect.py +190 -0
- snowflake/snowpark_connect/includes/python/pyspark/errors/tests/__init__.py +16 -0
- snowflake/snowpark_connect/includes/python/pyspark/errors/tests/test_errors.py +60 -0
- snowflake/snowpark_connect/includes/python/pyspark/errors/utils.py +116 -0
- snowflake/snowpark_connect/includes/python/pyspark/files.py +165 -0
- snowflake/snowpark_connect/includes/python/pyspark/find_spark_home.py +95 -0
- snowflake/snowpark_connect/includes/python/pyspark/install.py +203 -0
- snowflake/snowpark_connect/includes/python/pyspark/instrumentation_utils.py +190 -0
- snowflake/snowpark_connect/includes/python/pyspark/java_gateway.py +248 -0
- snowflake/snowpark_connect/includes/python/pyspark/join.py +118 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/__init__.py +71 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/_typing.pyi +84 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/base.py +414 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/classification.py +4332 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/clustering.py +2188 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/common.py +146 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/connect/__init__.py +44 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/connect/base.py +346 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/connect/classification.py +382 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/connect/evaluation.py +291 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/connect/feature.py +258 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/connect/functions.py +77 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/connect/io_utils.py +335 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/connect/pipeline.py +262 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/connect/summarizer.py +120 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/connect/tuning.py +579 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/connect/util.py +173 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/deepspeed/__init__.py +16 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/deepspeed/deepspeed_distributor.py +165 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/deepspeed/tests/test_deepspeed_distributor.py +306 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/dl_util.py +150 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/evaluation.py +1166 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/feature.py +7474 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/fpm.py +543 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/functions.py +842 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/image.py +271 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/linalg/__init__.py +1382 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/model_cache.py +55 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/param/__init__.py +602 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/param/_shared_params_code_gen.py +368 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/param/shared.py +878 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/pipeline.py +451 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/recommendation.py +748 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/regression.py +3335 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/stat.py +523 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/__init__.py +16 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_connect_classification.py +53 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_connect_evaluation.py +50 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_connect_feature.py +43 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_connect_function.py +114 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_connect_pipeline.py +47 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_connect_summarizer.py +43 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_connect_tuning.py +46 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_legacy_mode_classification.py +238 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_legacy_mode_evaluation.py +194 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_legacy_mode_feature.py +156 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_legacy_mode_pipeline.py +184 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_legacy_mode_summarizer.py +78 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_legacy_mode_tuning.py +292 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_parity_torch_data_loader.py +50 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/connect/test_parity_torch_distributor.py +152 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_algorithms.py +456 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_base.py +96 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_dl_util.py +186 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_evaluation.py +77 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_feature.py +401 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_functions.py +528 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_image.py +82 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_linalg.py +409 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_model_cache.py +55 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_param.py +441 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_persistence.py +546 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_pipeline.py +71 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_stat.py +52 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_training_summary.py +494 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_util.py +85 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/test_wrapper.py +138 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/tuning/__init__.py +16 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/tuning/test_cv_io_basic.py +151 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/tuning/test_cv_io_nested.py +97 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/tuning/test_cv_io_pipeline.py +143 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/tuning/test_tuning.py +551 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/tuning/test_tvs_io_basic.py +137 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/tuning/test_tvs_io_nested.py +96 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/tests/tuning/test_tvs_io_pipeline.py +142 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/torch/__init__.py +16 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/torch/data.py +100 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/torch/distributor.py +1133 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/torch/log_communication.py +198 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/torch/tests/__init__.py +16 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/torch/tests/test_data_loader.py +137 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/torch/tests/test_distributor.py +561 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/torch/tests/test_log_communication.py +172 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/torch/torch_run_process_wrapper.py +83 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/tree.py +434 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/tuning.py +1741 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/util.py +749 -0
- snowflake/snowpark_connect/includes/python/pyspark/ml/wrapper.py +465 -0
- snowflake/snowpark_connect/includes/python/pyspark/mllib/__init__.py +44 -0
- snowflake/snowpark_connect/includes/python/pyspark/mllib/_typing.pyi +33 -0
- snowflake/snowpark_connect/includes/python/pyspark/mllib/classification.py +989 -0
- snowflake/snowpark_connect/includes/python/pyspark/mllib/clustering.py +1318 -0
- snowflake/snowpark_connect/includes/python/pyspark/mllib/common.py +174 -0
- snowflake/snowpark_connect/includes/python/pyspark/mllib/evaluation.py +691 -0
- snowflake/snowpark_connect/includes/python/pyspark/mllib/feature.py +1085 -0
- snowflake/snowpark_connect/includes/python/pyspark/mllib/fpm.py +233 -0
- snowflake/snowpark_connect/includes/python/pyspark/mllib/linalg/__init__.py +1653 -0
- snowflake/snowpark_connect/includes/python/pyspark/mllib/linalg/distributed.py +1662 -0
- snowflake/snowpark_connect/includes/python/pyspark/mllib/random.py +698 -0
- snowflake/snowpark_connect/includes/python/pyspark/mllib/recommendation.py +389 -0
- snowflake/snowpark_connect/includes/python/pyspark/mllib/regression.py +1067 -0
- snowflake/snowpark_connect/includes/python/pyspark/mllib/stat/KernelDensity.py +59 -0
- snowflake/snowpark_connect/includes/python/pyspark/mllib/stat/__init__.py +34 -0
- snowflake/snowpark_connect/includes/python/pyspark/mllib/stat/_statistics.py +409 -0
- snowflake/snowpark_connect/includes/python/pyspark/mllib/stat/distribution.py +39 -0
- snowflake/snowpark_connect/includes/python/pyspark/mllib/stat/test.py +86 -0
- snowflake/snowpark_connect/includes/python/pyspark/mllib/tests/__init__.py +16 -0
- snowflake/snowpark_connect/includes/python/pyspark/mllib/tests/test_algorithms.py +353 -0
- snowflake/snowpark_connect/includes/python/pyspark/mllib/tests/test_feature.py +192 -0
- snowflake/snowpark_connect/includes/python/pyspark/mllib/tests/test_linalg.py +680 -0
- snowflake/snowpark_connect/includes/python/pyspark/mllib/tests/test_stat.py +206 -0
- snowflake/snowpark_connect/includes/python/pyspark/mllib/tests/test_streaming_algorithms.py +471 -0
- snowflake/snowpark_connect/includes/python/pyspark/mllib/tests/test_util.py +108 -0
- snowflake/snowpark_connect/includes/python/pyspark/mllib/tree.py +888 -0
- snowflake/snowpark_connect/includes/python/pyspark/mllib/util.py +659 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/__init__.py +165 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/_typing.py +52 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/accessors.py +989 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/base.py +1804 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/categorical.py +822 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/config.py +539 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/correlation.py +262 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/data_type_ops/__init__.py +16 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/data_type_ops/base.py +519 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/data_type_ops/binary_ops.py +98 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/data_type_ops/boolean_ops.py +426 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/data_type_ops/categorical_ops.py +141 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/data_type_ops/complex_ops.py +145 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/data_type_ops/date_ops.py +127 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/data_type_ops/datetime_ops.py +171 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/data_type_ops/null_ops.py +83 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/data_type_ops/num_ops.py +588 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/data_type_ops/string_ops.py +154 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/data_type_ops/timedelta_ops.py +101 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/data_type_ops/udt_ops.py +29 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/datetimes.py +891 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/exceptions.py +150 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/extensions.py +388 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/frame.py +13738 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/generic.py +3560 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/groupby.py +4448 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/indexes/__init__.py +21 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/indexes/base.py +2783 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/indexes/category.py +773 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/indexes/datetimes.py +843 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/indexes/multi.py +1323 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/indexes/numeric.py +210 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/indexes/timedelta.py +197 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/indexing.py +1862 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/internal.py +1680 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/missing/__init__.py +48 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/missing/common.py +76 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/missing/frame.py +63 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/missing/general_functions.py +43 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/missing/groupby.py +93 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/missing/indexes.py +184 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/missing/resample.py +101 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/missing/scalars.py +29 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/missing/series.py +69 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/missing/window.py +168 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/mlflow.py +238 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/namespace.py +3807 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/numpy_compat.py +260 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/plot/__init__.py +17 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/plot/core.py +1213 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/plot/matplotlib.py +928 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/plot/plotly.py +261 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/resample.py +816 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/series.py +7440 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/sql_formatter.py +308 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/sql_processor.py +394 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/strings.py +2371 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/supported_api_gen.py +378 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/__init__.py +16 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/__init__.py +16 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_any_all.py +177 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_apply_func.py +575 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_binary_ops.py +235 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_combine.py +653 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_compute.py +463 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_corrwith.py +86 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_cov.py +151 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_cumulative.py +139 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_describe.py +458 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_eval.py +86 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_melt.py +202 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_missing_data.py +520 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/computation/test_pivot.py +361 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/__init__.py +16 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/__init__.py +16 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_any_all.py +40 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_apply_func.py +42 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_binary_ops.py +40 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_combine.py +37 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_compute.py +60 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_corrwith.py +40 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_cov.py +40 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_cumulative.py +90 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_describe.py +40 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_eval.py +40 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_melt.py +40 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_missing_data.py +42 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/computation/test_parity_pivot.py +37 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/__init__.py +16 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_base.py +36 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_binary_ops.py +42 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_boolean_ops.py +47 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_categorical_ops.py +55 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_complex_ops.py +40 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_date_ops.py +47 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_datetime_ops.py +47 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_null_ops.py +42 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_num_arithmetic.py +43 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_num_ops.py +47 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_num_reverse.py +43 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_string_ops.py +47 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_timedelta_ops.py +47 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_udt_ops.py +40 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/data_type_ops/testing_utils.py +226 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/__init__.py +16 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_align.py +39 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_basic_slow.py +55 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_cov_corrwith.py +39 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_dot_frame.py +39 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_dot_series.py +39 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_index.py +39 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_series.py +39 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_setitem_frame.py +43 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/diff_frames_ops/test_parity_setitem_series.py +43 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/__init__.py +16 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_attrs.py +40 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_constructor.py +39 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_conversion.py +42 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_reindexing.py +42 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_reshaping.py +37 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_spark.py +40 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_take.py +42 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_time_series.py +48 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/frame/test_parity_truncate.py +40 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/__init__.py +16 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_aggregate.py +40 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_apply_func.py +41 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_cumulative.py +67 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_describe.py +40 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_groupby.py +55 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_head_tail.py +40 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_index.py +38 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_missing_data.py +55 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_split_apply.py +39 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/groupby/test_parity_stat.py +38 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/__init__.py +16 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_align.py +40 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_base.py +50 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_category.py +73 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_datetime.py +39 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_indexing.py +40 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_reindex.py +40 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_rename.py +40 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_reset_index.py +48 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/indexes/test_parity_timedelta.py +39 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/io/__init__.py +16 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/io/test_parity_io.py +40 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/plot/__init__.py +16 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/plot/test_parity_frame_plot.py +45 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/plot/test_parity_frame_plot_matplotlib.py +45 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/plot/test_parity_frame_plot_plotly.py +49 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/plot/test_parity_series_plot.py +37 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/plot/test_parity_series_plot_matplotlib.py +53 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/plot/test_parity_series_plot_plotly.py +45 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/__init__.py +16 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_all_any.py +38 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_arg_ops.py +37 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_as_of.py +37 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_as_type.py +38 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_compute.py +37 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_conversion.py +40 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_cumulative.py +40 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_index.py +38 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_missing_data.py +40 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_series.py +37 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_sort.py +38 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/series/test_parity_stat.py +38 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_categorical.py +66 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_config.py +37 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_csv.py +37 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_dataframe_conversion.py +42 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_dataframe_spark_io.py +39 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_default_index.py +49 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_ewm.py +37 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_expanding.py +39 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_extension.py +49 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_frame_spark.py +53 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_generic_functions.py +43 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_indexing.py +49 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_indexops_spark.py +39 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_internal.py +41 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_namespace.py +39 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_numpy_compat.py +60 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_ops_on_diff_frames.py +48 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_ops_on_diff_frames_groupby.py +39 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_ops_on_diff_frames_groupby_expanding.py +44 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_ops_on_diff_frames_groupby_rolling.py +84 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_repr.py +37 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_resample.py +45 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_reshape.py +39 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_rolling.py +39 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_scalars.py +37 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_series_conversion.py +39 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_series_datetime.py +39 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_series_string.py +39 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_spark_functions.py +39 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_sql.py +43 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_stats.py +37 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_typedef.py +36 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_utils.py +37 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/connect/test_parity_window.py +39 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/__init__.py +16 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_base.py +107 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_binary_ops.py +224 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_boolean_ops.py +825 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_categorical_ops.py +562 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_complex_ops.py +368 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_date_ops.py +257 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_datetime_ops.py +260 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_null_ops.py +178 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_num_arithmetic.py +184 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_num_ops.py +497 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_num_reverse.py +140 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_string_ops.py +354 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_timedelta_ops.py +219 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/test_udt_ops.py +192 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/data_type_ops/testing_utils.py +228 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/__init__.py +16 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_align.py +118 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_basic_slow.py +198 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_cov_corrwith.py +181 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_dot_frame.py +103 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_dot_series.py +141 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_index.py +109 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_series.py +136 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_setitem_frame.py +125 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/diff_frames_ops/test_setitem_series.py +217 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/__init__.py +16 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_attrs.py +384 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_constructor.py +598 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_conversion.py +73 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_reindexing.py +869 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_reshaping.py +487 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_spark.py +309 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_take.py +156 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_time_series.py +149 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/frame/test_truncate.py +163 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/__init__.py +16 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_aggregate.py +311 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_apply_func.py +524 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_cumulative.py +419 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_describe.py +144 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_groupby.py +979 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_head_tail.py +234 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_index.py +206 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_missing_data.py +421 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_split_apply.py +187 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/groupby/test_stat.py +397 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/__init__.py +16 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_align.py +100 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_base.py +2743 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_category.py +484 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_datetime.py +276 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_indexing.py +432 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_reindex.py +310 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_rename.py +257 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_reset_index.py +160 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/indexes/test_timedelta.py +128 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/io/__init__.py +16 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/io/test_io.py +137 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/plot/__init__.py +16 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/plot/test_frame_plot.py +170 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/plot/test_frame_plot_matplotlib.py +547 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/plot/test_frame_plot_plotly.py +285 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/plot/test_series_plot.py +106 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/plot/test_series_plot_matplotlib.py +409 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/plot/test_series_plot_plotly.py +247 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/__init__.py +16 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_all_any.py +105 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_arg_ops.py +197 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_as_of.py +137 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_as_type.py +227 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_compute.py +634 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_conversion.py +88 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_cumulative.py +139 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_index.py +475 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_missing_data.py +265 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_series.py +818 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_sort.py +162 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/series/test_stat.py +780 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_categorical.py +741 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_config.py +160 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_csv.py +453 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_dataframe_conversion.py +281 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_dataframe_spark_io.py +487 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_default_index.py +109 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_ewm.py +434 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_expanding.py +253 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_extension.py +152 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_frame_spark.py +162 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_generic_functions.py +234 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_indexing.py +1339 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_indexops_spark.py +82 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_internal.py +124 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_namespace.py +638 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_numpy_compat.py +200 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_ops_on_diff_frames.py +1355 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_ops_on_diff_frames_groupby.py +655 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_ops_on_diff_frames_groupby_expanding.py +113 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_ops_on_diff_frames_groupby_rolling.py +118 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_repr.py +192 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_resample.py +346 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_reshape.py +495 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_rolling.py +263 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_scalars.py +59 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_series_conversion.py +85 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_series_datetime.py +364 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_series_string.py +362 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_spark_functions.py +46 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_sql.py +123 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_stats.py +581 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_typedef.py +447 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_utils.py +301 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/tests/test_window.py +465 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/typedef/__init__.py +18 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/typedef/typehints.py +874 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/usage_logging/__init__.py +143 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/usage_logging/usage_logger.py +132 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/utils.py +1063 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/window.py +2702 -0
- snowflake/snowpark_connect/includes/python/pyspark/profiler.py +489 -0
- snowflake/snowpark_connect/includes/python/pyspark/py.typed +1 -0
- snowflake/snowpark_connect/includes/python/pyspark/python/pyspark/shell.py +123 -0
- snowflake/snowpark_connect/includes/python/pyspark/rdd.py +5518 -0
- snowflake/snowpark_connect/includes/python/pyspark/rddsampler.py +115 -0
- snowflake/snowpark_connect/includes/python/pyspark/resource/__init__.py +38 -0
- snowflake/snowpark_connect/includes/python/pyspark/resource/information.py +69 -0
- snowflake/snowpark_connect/includes/python/pyspark/resource/profile.py +317 -0
- snowflake/snowpark_connect/includes/python/pyspark/resource/requests.py +539 -0
- snowflake/snowpark_connect/includes/python/pyspark/resource/tests/__init__.py +16 -0
- snowflake/snowpark_connect/includes/python/pyspark/resource/tests/test_resources.py +83 -0
- snowflake/snowpark_connect/includes/python/pyspark/resultiterable.py +45 -0
- snowflake/snowpark_connect/includes/python/pyspark/serializers.py +681 -0
- snowflake/snowpark_connect/includes/python/pyspark/shell.py +123 -0
- snowflake/snowpark_connect/includes/python/pyspark/shuffle.py +854 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/__init__.py +75 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/_typing.pyi +80 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/avro/__init__.py +18 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/avro/functions.py +188 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/catalog.py +1270 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/column.py +1431 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/conf.py +99 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/connect/__init__.py +18 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/connect/_typing.py +90 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/connect/avro/__init__.py +18 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/connect/avro/functions.py +107 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/connect/catalog.py +356 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/connect/client/__init__.py +22 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/connect/client/artifact.py +412 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/connect/client/core.py +1689 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/connect/client/reattach.py +340 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/connect/column.py +514 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/connect/conf.py +128 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/connect/conversion.py +490 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/connect/dataframe.py +2172 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/connect/expressions.py +1056 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/connect/functions.py +3937 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/connect/group.py +418 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/connect/plan.py +2289 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/connect/proto/__init__.py +25 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/connect/proto/base_pb2.py +203 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/connect/proto/base_pb2.pyi +2718 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/connect/proto/base_pb2_grpc.py +423 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/connect/proto/catalog_pb2.py +109 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/connect/proto/catalog_pb2.pyi +1130 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/connect/proto/commands_pb2.py +141 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/connect/proto/commands_pb2.pyi +1766 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/connect/proto/common_pb2.py +47 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/connect/proto/common_pb2.pyi +123 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/connect/proto/example_plugins_pb2.py +53 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/connect/proto/example_plugins_pb2.pyi +112 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/connect/proto/expressions_pb2.py +107 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/connect/proto/expressions_pb2.pyi +1507 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/connect/proto/relations_pb2.py +195 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/connect/proto/relations_pb2.pyi +3613 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/connect/proto/types_pb2.py +95 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/connect/proto/types_pb2.pyi +980 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/connect/protobuf/__init__.py +18 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/connect/protobuf/functions.py +166 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/connect/readwriter.py +861 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/connect/session.py +952 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/connect/streaming/__init__.py +22 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/connect/streaming/query.py +295 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/connect/streaming/readwriter.py +618 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/connect/streaming/worker/__init__.py +18 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/connect/streaming/worker/foreach_batch_worker.py +87 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/connect/streaming/worker/listener_worker.py +100 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/connect/types.py +301 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/connect/udf.py +296 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/connect/udtf.py +200 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/connect/utils.py +58 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/connect/window.py +266 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/context.py +818 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/dataframe.py +5973 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/functions.py +15889 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/group.py +547 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/observation.py +152 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/pandas/__init__.py +21 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/pandas/_typing/__init__.pyi +344 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/pandas/_typing/protocols/__init__.pyi +17 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/pandas/_typing/protocols/frame.pyi +20 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/pandas/_typing/protocols/series.pyi +20 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/pandas/conversion.py +671 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/pandas/functions.py +480 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/pandas/functions.pyi +132 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/pandas/group_ops.py +523 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/pandas/map_ops.py +216 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/pandas/serializers.py +1019 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/pandas/typehints.py +172 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/pandas/types.py +972 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/pandas/utils.py +86 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/protobuf/__init__.py +18 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/protobuf/functions.py +334 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/readwriter.py +2159 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/session.py +2088 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/sql_formatter.py +84 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/streaming/__init__.py +21 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/streaming/listener.py +1050 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/streaming/query.py +746 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/streaming/readwriter.py +1652 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/streaming/state.py +288 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/__init__.py +16 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/__init__.py +16 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/client/__init__.py +16 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/client/test_artifact.py +420 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/client/test_client.py +358 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/streaming/__init__.py +16 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/streaming/test_parity_foreach.py +36 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/streaming/test_parity_foreach_batch.py +44 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/streaming/test_parity_listener.py +116 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/streaming/test_parity_streaming.py +35 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_connect_basic.py +3612 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_connect_column.py +1042 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_connect_function.py +2381 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_connect_plan.py +1060 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_arrow.py +163 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_arrow_map.py +38 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_arrow_python_udf.py +48 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_catalog.py +36 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_column.py +55 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_conf.py +36 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_dataframe.py +96 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_datasources.py +44 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_errors.py +36 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_functions.py +59 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_group.py +36 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_pandas_cogrouped_map.py +59 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_pandas_grouped_map.py +74 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_pandas_grouped_map_with_state.py +62 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_pandas_map.py +58 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_pandas_udf.py +70 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_pandas_udf_grouped_agg.py +50 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_pandas_udf_scalar.py +68 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_pandas_udf_window.py +40 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_readwriter.py +46 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_serde.py +44 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_types.py +100 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_udf.py +100 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_parity_udtf.py +163 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_session.py +181 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/connect/test_utils.py +42 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/__init__.py +16 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_cogrouped_map.py +623 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_grouped_map.py +869 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_grouped_map_with_state.py +342 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_map.py +436 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_udf.py +363 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_udf_grouped_agg.py +592 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_udf_scalar.py +1503 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_udf_typehints.py +392 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_udf_typehints_with_future_annotations.py +375 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/pandas/test_pandas_udf_window.py +411 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/streaming/__init__.py +16 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/streaming/test_streaming.py +401 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/streaming/test_streaming_foreach.py +295 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/streaming/test_streaming_foreach_batch.py +106 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/streaming/test_streaming_listener.py +558 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_arrow.py +1346 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_arrow_map.py +182 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_arrow_python_udf.py +202 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_catalog.py +503 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_column.py +225 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_conf.py +83 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_context.py +201 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_dataframe.py +1931 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_datasources.py +256 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_errors.py +69 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_functions.py +1349 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_group.py +53 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_pandas_sqlmetrics.py +68 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_readwriter.py +283 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_serde.py +155 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_session.py +412 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_types.py +1581 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_udf.py +961 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_udf_profiler.py +165 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_udtf.py +1456 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/tests/test_utils.py +1686 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/types.py +2558 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/udf.py +714 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/udtf.py +325 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/utils.py +339 -0
- snowflake/snowpark_connect/includes/python/pyspark/sql/window.py +492 -0
- snowflake/snowpark_connect/includes/python/pyspark/statcounter.py +165 -0
- snowflake/snowpark_connect/includes/python/pyspark/status.py +112 -0
- snowflake/snowpark_connect/includes/python/pyspark/storagelevel.py +97 -0
- snowflake/snowpark_connect/includes/python/pyspark/streaming/__init__.py +22 -0
- snowflake/snowpark_connect/includes/python/pyspark/streaming/context.py +471 -0
- snowflake/snowpark_connect/includes/python/pyspark/streaming/dstream.py +933 -0
- snowflake/snowpark_connect/includes/python/pyspark/streaming/kinesis.py +205 -0
- snowflake/snowpark_connect/includes/python/pyspark/streaming/listener.py +83 -0
- snowflake/snowpark_connect/includes/python/pyspark/streaming/tests/__init__.py +16 -0
- snowflake/snowpark_connect/includes/python/pyspark/streaming/tests/test_context.py +184 -0
- snowflake/snowpark_connect/includes/python/pyspark/streaming/tests/test_dstream.py +706 -0
- snowflake/snowpark_connect/includes/python/pyspark/streaming/tests/test_kinesis.py +118 -0
- snowflake/snowpark_connect/includes/python/pyspark/streaming/tests/test_listener.py +160 -0
- snowflake/snowpark_connect/includes/python/pyspark/streaming/util.py +168 -0
- snowflake/snowpark_connect/includes/python/pyspark/taskcontext.py +502 -0
- snowflake/snowpark_connect/includes/python/pyspark/testing/__init__.py +21 -0
- snowflake/snowpark_connect/includes/python/pyspark/testing/connectutils.py +199 -0
- snowflake/snowpark_connect/includes/python/pyspark/testing/mllibutils.py +30 -0
- snowflake/snowpark_connect/includes/python/pyspark/testing/mlutils.py +275 -0
- snowflake/snowpark_connect/includes/python/pyspark/testing/objects.py +121 -0
- snowflake/snowpark_connect/includes/python/pyspark/testing/pandasutils.py +714 -0
- snowflake/snowpark_connect/includes/python/pyspark/testing/sqlutils.py +168 -0
- snowflake/snowpark_connect/includes/python/pyspark/testing/streamingutils.py +178 -0
- snowflake/snowpark_connect/includes/python/pyspark/testing/utils.py +636 -0
- snowflake/snowpark_connect/includes/python/pyspark/tests/__init__.py +16 -0
- snowflake/snowpark_connect/includes/python/pyspark/tests/test_appsubmit.py +306 -0
- snowflake/snowpark_connect/includes/python/pyspark/tests/test_broadcast.py +196 -0
- snowflake/snowpark_connect/includes/python/pyspark/tests/test_conf.py +44 -0
- snowflake/snowpark_connect/includes/python/pyspark/tests/test_context.py +346 -0
- snowflake/snowpark_connect/includes/python/pyspark/tests/test_daemon.py +89 -0
- snowflake/snowpark_connect/includes/python/pyspark/tests/test_install_spark.py +124 -0
- snowflake/snowpark_connect/includes/python/pyspark/tests/test_join.py +69 -0
- snowflake/snowpark_connect/includes/python/pyspark/tests/test_memory_profiler.py +167 -0
- snowflake/snowpark_connect/includes/python/pyspark/tests/test_pin_thread.py +194 -0
- snowflake/snowpark_connect/includes/python/pyspark/tests/test_profiler.py +168 -0
- snowflake/snowpark_connect/includes/python/pyspark/tests/test_rdd.py +939 -0
- snowflake/snowpark_connect/includes/python/pyspark/tests/test_rddbarrier.py +52 -0
- snowflake/snowpark_connect/includes/python/pyspark/tests/test_rddsampler.py +66 -0
- snowflake/snowpark_connect/includes/python/pyspark/tests/test_readwrite.py +368 -0
- snowflake/snowpark_connect/includes/python/pyspark/tests/test_serializers.py +257 -0
- snowflake/snowpark_connect/includes/python/pyspark/tests/test_shuffle.py +267 -0
- snowflake/snowpark_connect/includes/python/pyspark/tests/test_stage_sched.py +153 -0
- snowflake/snowpark_connect/includes/python/pyspark/tests/test_statcounter.py +130 -0
- snowflake/snowpark_connect/includes/python/pyspark/tests/test_taskcontext.py +350 -0
- snowflake/snowpark_connect/includes/python/pyspark/tests/test_util.py +97 -0
- snowflake/snowpark_connect/includes/python/pyspark/tests/test_worker.py +271 -0
- snowflake/snowpark_connect/includes/python/pyspark/traceback_utils.py +81 -0
- snowflake/snowpark_connect/includes/python/pyspark/util.py +416 -0
- snowflake/snowpark_connect/includes/python/pyspark/version.py +19 -0
- snowflake/snowpark_connect/includes/python/pyspark/worker.py +1307 -0
- snowflake/snowpark_connect/includes/python/pyspark/worker_util.py +46 -0
- snowflake/snowpark_connect/proto/__init__.py +10 -0
- snowflake/snowpark_connect/proto/control_pb2.py +35 -0
- snowflake/snowpark_connect/proto/control_pb2.pyi +38 -0
- snowflake/snowpark_connect/proto/control_pb2_grpc.py +183 -0
- snowflake/snowpark_connect/proto/snowflake_expression_ext_pb2.py +35 -0
- snowflake/snowpark_connect/proto/snowflake_expression_ext_pb2.pyi +53 -0
- snowflake/snowpark_connect/proto/snowflake_rdd_pb2.pyi +39 -0
- snowflake/snowpark_connect/proto/snowflake_relation_ext_pb2.py +47 -0
- snowflake/snowpark_connect/proto/snowflake_relation_ext_pb2.pyi +111 -0
- snowflake/snowpark_connect/relation/__init__.py +3 -0
- snowflake/snowpark_connect/relation/catalogs/__init__.py +12 -0
- snowflake/snowpark_connect/relation/catalogs/abstract_spark_catalog.py +287 -0
- snowflake/snowpark_connect/relation/catalogs/snowflake_catalog.py +467 -0
- snowflake/snowpark_connect/relation/catalogs/utils.py +51 -0
- snowflake/snowpark_connect/relation/io_utils.py +76 -0
- snowflake/snowpark_connect/relation/map_aggregate.py +322 -0
- snowflake/snowpark_connect/relation/map_catalog.py +151 -0
- snowflake/snowpark_connect/relation/map_column_ops.py +1068 -0
- snowflake/snowpark_connect/relation/map_crosstab.py +48 -0
- snowflake/snowpark_connect/relation/map_extension.py +412 -0
- snowflake/snowpark_connect/relation/map_join.py +341 -0
- snowflake/snowpark_connect/relation/map_local_relation.py +326 -0
- snowflake/snowpark_connect/relation/map_map_partitions.py +146 -0
- snowflake/snowpark_connect/relation/map_relation.py +253 -0
- snowflake/snowpark_connect/relation/map_row_ops.py +716 -0
- snowflake/snowpark_connect/relation/map_sample_by.py +35 -0
- snowflake/snowpark_connect/relation/map_show_string.py +50 -0
- snowflake/snowpark_connect/relation/map_sql.py +1874 -0
- snowflake/snowpark_connect/relation/map_stats.py +324 -0
- snowflake/snowpark_connect/relation/map_subquery_alias.py +32 -0
- snowflake/snowpark_connect/relation/map_udtf.py +288 -0
- snowflake/snowpark_connect/relation/read/__init__.py +7 -0
- snowflake/snowpark_connect/relation/read/jdbc_read_dbapi.py +668 -0
- snowflake/snowpark_connect/relation/read/map_read.py +367 -0
- snowflake/snowpark_connect/relation/read/map_read_csv.py +142 -0
- snowflake/snowpark_connect/relation/read/map_read_jdbc.py +108 -0
- snowflake/snowpark_connect/relation/read/map_read_json.py +344 -0
- snowflake/snowpark_connect/relation/read/map_read_parquet.py +194 -0
- snowflake/snowpark_connect/relation/read/map_read_socket.py +59 -0
- snowflake/snowpark_connect/relation/read/map_read_table.py +109 -0
- snowflake/snowpark_connect/relation/read/map_read_text.py +106 -0
- snowflake/snowpark_connect/relation/read/reader_config.py +399 -0
- snowflake/snowpark_connect/relation/read/utils.py +155 -0
- snowflake/snowpark_connect/relation/stage_locator.py +161 -0
- snowflake/snowpark_connect/relation/utils.py +219 -0
- snowflake/snowpark_connect/relation/write/__init__.py +3 -0
- snowflake/snowpark_connect/relation/write/jdbc_write_dbapi.py +339 -0
- snowflake/snowpark_connect/relation/write/map_write.py +436 -0
- snowflake/snowpark_connect/relation/write/map_write_jdbc.py +48 -0
- snowflake/snowpark_connect/resources/java_udfs-1.0-SNAPSHOT.jar +0 -0
- snowflake/snowpark_connect/resources_initializer.py +75 -0
- snowflake/snowpark_connect/server.py +1136 -0
- snowflake/snowpark_connect/start_server.py +32 -0
- snowflake/snowpark_connect/tcm.py +8 -0
- snowflake/snowpark_connect/type_mapping.py +1003 -0
- snowflake/snowpark_connect/typed_column.py +94 -0
- snowflake/snowpark_connect/utils/__init__.py +3 -0
- snowflake/snowpark_connect/utils/artifacts.py +48 -0
- snowflake/snowpark_connect/utils/attribute_handling.py +72 -0
- snowflake/snowpark_connect/utils/cache.py +84 -0
- snowflake/snowpark_connect/utils/concurrent.py +124 -0
- snowflake/snowpark_connect/utils/context.py +390 -0
- snowflake/snowpark_connect/utils/describe_query_cache.py +231 -0
- snowflake/snowpark_connect/utils/interrupt.py +85 -0
- snowflake/snowpark_connect/utils/io_utils.py +35 -0
- snowflake/snowpark_connect/utils/pandas_udtf_utils.py +117 -0
- snowflake/snowpark_connect/utils/profiling.py +47 -0
- snowflake/snowpark_connect/utils/session.py +180 -0
- snowflake/snowpark_connect/utils/snowpark_connect_logging.py +38 -0
- snowflake/snowpark_connect/utils/telemetry.py +513 -0
- snowflake/snowpark_connect/utils/udf_cache.py +392 -0
- snowflake/snowpark_connect/utils/udf_helper.py +328 -0
- snowflake/snowpark_connect/utils/udf_utils.py +310 -0
- snowflake/snowpark_connect/utils/udtf_helper.py +420 -0
- snowflake/snowpark_connect/utils/udtf_utils.py +799 -0
- snowflake/snowpark_connect/utils/xxhash64.py +247 -0
- snowflake/snowpark_connect/version.py +6 -0
- snowpark_connect-0.20.2.data/scripts/snowpark-connect +71 -0
- snowpark_connect-0.20.2.data/scripts/snowpark-session +11 -0
- snowpark_connect-0.20.2.data/scripts/snowpark-submit +354 -0
- snowpark_connect-0.20.2.dist-info/METADATA +37 -0
- snowpark_connect-0.20.2.dist-info/RECORD +879 -0
- snowpark_connect-0.20.2.dist-info/WHEEL +5 -0
- snowpark_connect-0.20.2.dist-info/licenses/LICENSE.txt +202 -0
- snowpark_connect-0.20.2.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,3937 @@
|
|
|
1
|
+
#
|
|
2
|
+
# Licensed to the Apache Software Foundation (ASF) under one or more
|
|
3
|
+
# contributor license agreements. See the NOTICE file distributed with
|
|
4
|
+
# this work for additional information regarding copyright ownership.
|
|
5
|
+
# The ASF licenses this file to You under the Apache License, Version 2.0
|
|
6
|
+
# (the "License"); you may not use this file except in compliance with
|
|
7
|
+
# the License. You may obtain a copy of the License at
|
|
8
|
+
#
|
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
10
|
+
#
|
|
11
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
12
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
13
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
14
|
+
# See the License for the specific language governing permissions and
|
|
15
|
+
# limitations under the License.
|
|
16
|
+
#
|
|
17
|
+
from pyspark.sql.connect.utils import check_dependencies
|
|
18
|
+
|
|
19
|
+
check_dependencies(__name__)
|
|
20
|
+
|
|
21
|
+
import decimal
|
|
22
|
+
import inspect
|
|
23
|
+
import warnings
|
|
24
|
+
import functools
|
|
25
|
+
from typing import (
|
|
26
|
+
Any,
|
|
27
|
+
Dict,
|
|
28
|
+
TYPE_CHECKING,
|
|
29
|
+
Union,
|
|
30
|
+
List,
|
|
31
|
+
overload,
|
|
32
|
+
Optional,
|
|
33
|
+
Tuple,
|
|
34
|
+
Type,
|
|
35
|
+
Callable,
|
|
36
|
+
ValuesView,
|
|
37
|
+
cast,
|
|
38
|
+
)
|
|
39
|
+
|
|
40
|
+
import numpy as np
|
|
41
|
+
|
|
42
|
+
from pyspark.errors import PySparkTypeError, PySparkValueError
|
|
43
|
+
from pyspark.sql.connect.column import Column
|
|
44
|
+
from pyspark.sql.connect.expressions import (
|
|
45
|
+
CaseWhen,
|
|
46
|
+
Expression,
|
|
47
|
+
LiteralExpression,
|
|
48
|
+
ColumnReference,
|
|
49
|
+
UnresolvedFunction,
|
|
50
|
+
UnresolvedStar,
|
|
51
|
+
SQLExpression,
|
|
52
|
+
LambdaFunction,
|
|
53
|
+
UnresolvedNamedLambdaVariable,
|
|
54
|
+
CallFunction,
|
|
55
|
+
)
|
|
56
|
+
from pyspark.sql.connect.udf import _create_py_udf
|
|
57
|
+
from pyspark.sql.connect.udtf import _create_py_udtf
|
|
58
|
+
from pyspark.sql import functions as pysparkfuncs
|
|
59
|
+
from pyspark.sql.types import _from_numpy_type, DataType, StructType, ArrayType, StringType
|
|
60
|
+
|
|
61
|
+
# The implementation of pandas_udf is embedded in pyspark.sql.function.pandas_udf
|
|
62
|
+
# for code reuse.
|
|
63
|
+
from pyspark.sql.functions import pandas_udf # noqa: F401
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
if TYPE_CHECKING:
|
|
67
|
+
from pyspark.sql.connect._typing import (
|
|
68
|
+
ColumnOrName,
|
|
69
|
+
DataTypeOrString,
|
|
70
|
+
UserDefinedFunctionLike,
|
|
71
|
+
)
|
|
72
|
+
from pyspark.sql.connect.dataframe import DataFrame
|
|
73
|
+
from pyspark.sql.connect.udtf import UserDefinedTableFunction
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
def _to_col_with_plan_id(col: str, plan_id: Optional[int]) -> Column:
|
|
77
|
+
if col == "*":
|
|
78
|
+
return Column(UnresolvedStar(unparsed_target=None))
|
|
79
|
+
elif col.endswith(".*"):
|
|
80
|
+
return Column(UnresolvedStar(unparsed_target=col))
|
|
81
|
+
else:
|
|
82
|
+
return Column(ColumnReference(unparsed_identifier=col, plan_id=plan_id))
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
def _to_col(col: "ColumnOrName") -> Column:
|
|
86
|
+
assert isinstance(col, (Column, str))
|
|
87
|
+
return col if isinstance(col, Column) else column(col)
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
def _invoke_function(name: str, *args: Union[Column, Expression]) -> Column:
|
|
91
|
+
"""
|
|
92
|
+
Simple wrapper function that converts the arguments into the appropriate types.
|
|
93
|
+
Parameters
|
|
94
|
+
----------
|
|
95
|
+
name Name of the function to be called.
|
|
96
|
+
args The list of arguments.
|
|
97
|
+
|
|
98
|
+
Returns
|
|
99
|
+
-------
|
|
100
|
+
:class:`Column`
|
|
101
|
+
"""
|
|
102
|
+
expressions: List[Expression] = []
|
|
103
|
+
for arg in args:
|
|
104
|
+
assert isinstance(arg, (Column, Expression))
|
|
105
|
+
if isinstance(arg, Column):
|
|
106
|
+
expressions.append(arg._expr)
|
|
107
|
+
else:
|
|
108
|
+
expressions.append(arg)
|
|
109
|
+
return Column(UnresolvedFunction(name, expressions))
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
def _invoke_function_over_columns(name: str, *cols: "ColumnOrName") -> Column:
|
|
113
|
+
"""
|
|
114
|
+
Invokes n-ary function identified by name
|
|
115
|
+
and wraps the result with :class:`~pyspark.sql.Column`.
|
|
116
|
+
"""
|
|
117
|
+
_cols = [_to_col(c) for c in cols]
|
|
118
|
+
return _invoke_function(name, *_cols)
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
def _invoke_binary_math_function(name: str, col1: Any, col2: Any) -> Column:
|
|
122
|
+
"""
|
|
123
|
+
Invokes binary math function identified by name
|
|
124
|
+
and wraps the result with :class:`~pyspark.sql.Column`.
|
|
125
|
+
"""
|
|
126
|
+
|
|
127
|
+
# For legacy reasons, the arguments here can be implicitly converted into column
|
|
128
|
+
_cols = [_to_col(c) if isinstance(c, (str, Column)) else lit(c) for c in (col1, col2)]
|
|
129
|
+
return _invoke_function(name, *_cols)
|
|
130
|
+
|
|
131
|
+
|
|
132
|
+
def _get_lambda_parameters(f: Callable) -> ValuesView[inspect.Parameter]:
|
|
133
|
+
signature = inspect.signature(f)
|
|
134
|
+
parameters = signature.parameters.values()
|
|
135
|
+
|
|
136
|
+
# We should exclude functions that use, variable args and keyword argument
|
|
137
|
+
# names, as well as keyword only args.
|
|
138
|
+
supported_parameter_types = {
|
|
139
|
+
inspect.Parameter.POSITIONAL_OR_KEYWORD,
|
|
140
|
+
inspect.Parameter.POSITIONAL_ONLY,
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
# Validate that the function arity is between 1 and 3.
|
|
144
|
+
if not (1 <= len(parameters) <= 3):
|
|
145
|
+
raise PySparkValueError(
|
|
146
|
+
error_class="WRONG_NUM_ARGS_FOR_HIGHER_ORDER_FUNCTION",
|
|
147
|
+
message_parameters={"func_name": f.__name__, "num_args": str(len(parameters))},
|
|
148
|
+
)
|
|
149
|
+
|
|
150
|
+
# Verify that all arguments can be used as positional arguments.
|
|
151
|
+
if not all(p.kind in supported_parameter_types for p in parameters):
|
|
152
|
+
raise PySparkValueError(
|
|
153
|
+
error_class="UNSUPPORTED_PARAM_TYPE_FOR_HIGHER_ORDER_FUNCTION",
|
|
154
|
+
message_parameters={"func_name": f.__name__},
|
|
155
|
+
)
|
|
156
|
+
|
|
157
|
+
return parameters
|
|
158
|
+
|
|
159
|
+
|
|
160
|
+
def _create_lambda(f: Callable) -> LambdaFunction:
|
|
161
|
+
"""
|
|
162
|
+
Create `o.a.s.sql.expressions.LambdaFunction` corresponding
|
|
163
|
+
to transformation described by f
|
|
164
|
+
|
|
165
|
+
:param f: A Python of one of the following forms:
|
|
166
|
+
- (Column) -> Column: ...
|
|
167
|
+
- (Column, Column) -> Column: ...
|
|
168
|
+
- (Column, Column, Column) -> Column: ...
|
|
169
|
+
"""
|
|
170
|
+
parameters = _get_lambda_parameters(f)
|
|
171
|
+
|
|
172
|
+
arg_names = ["x", "y", "z"][: len(parameters)]
|
|
173
|
+
arg_exprs = [
|
|
174
|
+
UnresolvedNamedLambdaVariable([UnresolvedNamedLambdaVariable.fresh_var_name(arg_name)])
|
|
175
|
+
for arg_name in arg_names
|
|
176
|
+
]
|
|
177
|
+
arg_cols = [Column(arg_expr) for arg_expr in arg_exprs]
|
|
178
|
+
|
|
179
|
+
result = f(*arg_cols)
|
|
180
|
+
|
|
181
|
+
if not isinstance(result, Column):
|
|
182
|
+
raise PySparkValueError(
|
|
183
|
+
error_class="HIGHER_ORDER_FUNCTION_SHOULD_RETURN_COLUMN",
|
|
184
|
+
message_parameters={"func_name": f.__name__, "return_type": type(result).__name__},
|
|
185
|
+
)
|
|
186
|
+
|
|
187
|
+
return LambdaFunction(result._expr, arg_exprs)
|
|
188
|
+
|
|
189
|
+
|
|
190
|
+
def _invoke_higher_order_function(
|
|
191
|
+
name: str,
|
|
192
|
+
cols: List["ColumnOrName"],
|
|
193
|
+
funs: List[Callable],
|
|
194
|
+
) -> Column:
|
|
195
|
+
"""
|
|
196
|
+
Invokes expression identified by name,
|
|
197
|
+
(relative to ```org.apache.spark.sql.catalyst.expressions``)
|
|
198
|
+
and wraps the result with Column (first Scala one, then Python).
|
|
199
|
+
|
|
200
|
+
:param name: Name of the expression
|
|
201
|
+
:param cols: a list of columns
|
|
202
|
+
:param funs: a list of (*Column) -> Column functions.
|
|
203
|
+
|
|
204
|
+
:return: a Column
|
|
205
|
+
"""
|
|
206
|
+
_cols = [_to_col(c) for c in cols]
|
|
207
|
+
_funs = [_create_lambda(f) for f in funs]
|
|
208
|
+
|
|
209
|
+
return _invoke_function(name, *_cols, *_funs)
|
|
210
|
+
|
|
211
|
+
|
|
212
|
+
def _options_to_col(options: Dict[str, Any]) -> Column:
|
|
213
|
+
_options: List[Column] = []
|
|
214
|
+
for k, v in options.items():
|
|
215
|
+
_options.append(lit(str(k)))
|
|
216
|
+
_options.append(lit(str(v)))
|
|
217
|
+
return create_map(*_options)
|
|
218
|
+
|
|
219
|
+
|
|
220
|
+
# Normal Functions
|
|
221
|
+
|
|
222
|
+
|
|
223
|
+
def col(col: str) -> Column:
|
|
224
|
+
return _to_col_with_plan_id(col=col, plan_id=None)
|
|
225
|
+
|
|
226
|
+
|
|
227
|
+
col.__doc__ = pysparkfuncs.col.__doc__
|
|
228
|
+
|
|
229
|
+
|
|
230
|
+
column = col
|
|
231
|
+
|
|
232
|
+
|
|
233
|
+
def lit(col: Any) -> Column:
|
|
234
|
+
if isinstance(col, Column):
|
|
235
|
+
return col
|
|
236
|
+
elif isinstance(col, list):
|
|
237
|
+
if any(isinstance(c, Column) for c in col):
|
|
238
|
+
raise PySparkValueError(
|
|
239
|
+
error_class="COLUMN_IN_LIST", message_parameters={"func_name": "lit"}
|
|
240
|
+
)
|
|
241
|
+
return array(*[lit(c) for c in col])
|
|
242
|
+
elif isinstance(col, np.ndarray) and col.ndim == 1:
|
|
243
|
+
if _from_numpy_type(col.dtype) is None:
|
|
244
|
+
raise PySparkTypeError(
|
|
245
|
+
error_class="UNSUPPORTED_NUMPY_ARRAY_SCALAR",
|
|
246
|
+
message_parameters={"dtype": col.dtype.name},
|
|
247
|
+
)
|
|
248
|
+
|
|
249
|
+
# NumpyArrayConverter for Py4J can not support ndarray with int8 values.
|
|
250
|
+
# Actually this is not a problem for Connect, but here still convert it
|
|
251
|
+
# to int16 for compatibility.
|
|
252
|
+
if col.dtype == np.int8:
|
|
253
|
+
col = col.astype(np.int16)
|
|
254
|
+
|
|
255
|
+
return array(*[lit(c) for c in col])
|
|
256
|
+
else:
|
|
257
|
+
return Column(LiteralExpression._from_value(col))
|
|
258
|
+
|
|
259
|
+
|
|
260
|
+
lit.__doc__ = pysparkfuncs.lit.__doc__
|
|
261
|
+
|
|
262
|
+
|
|
263
|
+
def bitwiseNOT(col: "ColumnOrName") -> Column:
|
|
264
|
+
warnings.warn("Deprecated in 3.4, use bitwise_not instead.", FutureWarning)
|
|
265
|
+
return bitwise_not(col)
|
|
266
|
+
|
|
267
|
+
|
|
268
|
+
bitwiseNOT.__doc__ = pysparkfuncs.bitwiseNOT.__doc__
|
|
269
|
+
|
|
270
|
+
|
|
271
|
+
def bitwise_not(col: "ColumnOrName") -> Column:
|
|
272
|
+
return _invoke_function_over_columns("~", col)
|
|
273
|
+
|
|
274
|
+
|
|
275
|
+
bitwise_not.__doc__ = pysparkfuncs.bitwise_not.__doc__
|
|
276
|
+
|
|
277
|
+
|
|
278
|
+
def bit_count(col: "ColumnOrName") -> Column:
|
|
279
|
+
return _invoke_function_over_columns("bit_count", col)
|
|
280
|
+
|
|
281
|
+
|
|
282
|
+
bit_count.__doc__ = pysparkfuncs.bit_count.__doc__
|
|
283
|
+
|
|
284
|
+
|
|
285
|
+
def bit_get(col: "ColumnOrName", pos: "ColumnOrName") -> Column:
|
|
286
|
+
return _invoke_function_over_columns("bit_get", col, pos)
|
|
287
|
+
|
|
288
|
+
|
|
289
|
+
bit_get.__doc__ = pysparkfuncs.bit_get.__doc__
|
|
290
|
+
|
|
291
|
+
|
|
292
|
+
def getbit(col: "ColumnOrName", pos: "ColumnOrName") -> Column:
|
|
293
|
+
return _invoke_function_over_columns("getbit", col, pos)
|
|
294
|
+
|
|
295
|
+
|
|
296
|
+
getbit.__doc__ = pysparkfuncs.getbit.__doc__
|
|
297
|
+
|
|
298
|
+
|
|
299
|
+
def broadcast(df: "DataFrame") -> "DataFrame":
|
|
300
|
+
from pyspark.sql.connect.dataframe import DataFrame
|
|
301
|
+
|
|
302
|
+
if not isinstance(df, DataFrame):
|
|
303
|
+
raise PySparkTypeError(
|
|
304
|
+
error_class="NOT_DATAFRAME",
|
|
305
|
+
message_parameters={"arg_name": "df", "arg_type": type(df).__name__},
|
|
306
|
+
)
|
|
307
|
+
return df.hint("broadcast")
|
|
308
|
+
|
|
309
|
+
|
|
310
|
+
broadcast.__doc__ = pysparkfuncs.broadcast.__doc__
|
|
311
|
+
|
|
312
|
+
|
|
313
|
+
def coalesce(*cols: "ColumnOrName") -> Column:
|
|
314
|
+
return _invoke_function_over_columns("coalesce", *cols)
|
|
315
|
+
|
|
316
|
+
|
|
317
|
+
coalesce.__doc__ = pysparkfuncs.coalesce.__doc__
|
|
318
|
+
|
|
319
|
+
|
|
320
|
+
def expr(str: str) -> Column:
|
|
321
|
+
return Column(SQLExpression(str))
|
|
322
|
+
|
|
323
|
+
|
|
324
|
+
expr.__doc__ = pysparkfuncs.expr.__doc__
|
|
325
|
+
|
|
326
|
+
|
|
327
|
+
def greatest(*cols: "ColumnOrName") -> Column:
|
|
328
|
+
if len(cols) < 2:
|
|
329
|
+
raise PySparkValueError(
|
|
330
|
+
error_class="WRONG_NUM_COLUMNS",
|
|
331
|
+
message_parameters={"func_name": "greatest", "num_cols": "2"},
|
|
332
|
+
)
|
|
333
|
+
return _invoke_function_over_columns("greatest", *cols)
|
|
334
|
+
|
|
335
|
+
|
|
336
|
+
greatest.__doc__ = pysparkfuncs.greatest.__doc__
|
|
337
|
+
|
|
338
|
+
|
|
339
|
+
def input_file_name() -> Column:
|
|
340
|
+
return _invoke_function("input_file_name")
|
|
341
|
+
|
|
342
|
+
|
|
343
|
+
input_file_name.__doc__ = pysparkfuncs.input_file_name.__doc__
|
|
344
|
+
|
|
345
|
+
|
|
346
|
+
def least(*cols: "ColumnOrName") -> Column:
|
|
347
|
+
if len(cols) < 2:
|
|
348
|
+
raise PySparkValueError(
|
|
349
|
+
error_class="WRONG_NUM_COLUMNS",
|
|
350
|
+
message_parameters={"func_name": "least", "num_cols": "2"},
|
|
351
|
+
)
|
|
352
|
+
return _invoke_function_over_columns("least", *cols)
|
|
353
|
+
|
|
354
|
+
|
|
355
|
+
least.__doc__ = pysparkfuncs.least.__doc__
|
|
356
|
+
|
|
357
|
+
|
|
358
|
+
def isnan(col: "ColumnOrName") -> Column:
|
|
359
|
+
return _invoke_function_over_columns("isnan", col)
|
|
360
|
+
|
|
361
|
+
|
|
362
|
+
isnan.__doc__ = pysparkfuncs.isnan.__doc__
|
|
363
|
+
|
|
364
|
+
|
|
365
|
+
def isnull(col: "ColumnOrName") -> Column:
|
|
366
|
+
return _invoke_function_over_columns("isnull", col)
|
|
367
|
+
|
|
368
|
+
|
|
369
|
+
isnull.__doc__ = pysparkfuncs.isnull.__doc__
|
|
370
|
+
|
|
371
|
+
|
|
372
|
+
def monotonically_increasing_id() -> Column:
|
|
373
|
+
return _invoke_function("monotonically_increasing_id")
|
|
374
|
+
|
|
375
|
+
|
|
376
|
+
monotonically_increasing_id.__doc__ = pysparkfuncs.monotonically_increasing_id.__doc__
|
|
377
|
+
|
|
378
|
+
|
|
379
|
+
def nanvl(col1: "ColumnOrName", col2: "ColumnOrName") -> Column:
|
|
380
|
+
return _invoke_function_over_columns("nanvl", col1, col2)
|
|
381
|
+
|
|
382
|
+
|
|
383
|
+
nanvl.__doc__ = pysparkfuncs.nanvl.__doc__
|
|
384
|
+
|
|
385
|
+
|
|
386
|
+
def rand(seed: Optional[int] = None) -> Column:
|
|
387
|
+
if seed is not None:
|
|
388
|
+
return _invoke_function("rand", lit(seed))
|
|
389
|
+
else:
|
|
390
|
+
return _invoke_function("rand")
|
|
391
|
+
|
|
392
|
+
|
|
393
|
+
rand.__doc__ = pysparkfuncs.rand.__doc__
|
|
394
|
+
|
|
395
|
+
|
|
396
|
+
def randn(seed: Optional[int] = None) -> Column:
|
|
397
|
+
if seed is not None:
|
|
398
|
+
return _invoke_function("randn", lit(seed))
|
|
399
|
+
else:
|
|
400
|
+
return _invoke_function("randn")
|
|
401
|
+
|
|
402
|
+
|
|
403
|
+
randn.__doc__ = pysparkfuncs.randn.__doc__
|
|
404
|
+
|
|
405
|
+
|
|
406
|
+
def spark_partition_id() -> Column:
|
|
407
|
+
return _invoke_function("spark_partition_id")
|
|
408
|
+
|
|
409
|
+
|
|
410
|
+
spark_partition_id.__doc__ = pysparkfuncs.spark_partition_id.__doc__
|
|
411
|
+
|
|
412
|
+
|
|
413
|
+
def when(condition: Column, value: Any) -> Column:
|
|
414
|
+
# Explicitly not using ColumnOrName type here to make reading condition less opaque
|
|
415
|
+
if not isinstance(condition, Column):
|
|
416
|
+
raise PySparkTypeError(
|
|
417
|
+
error_class="NOT_COLUMN",
|
|
418
|
+
message_parameters={"arg_name": "condition", "arg_type": type(condition).__name__},
|
|
419
|
+
)
|
|
420
|
+
|
|
421
|
+
value_col = value if isinstance(value, Column) else lit(value)
|
|
422
|
+
|
|
423
|
+
return Column(CaseWhen(branches=[(condition._expr, value_col._expr)], else_value=None))
|
|
424
|
+
|
|
425
|
+
|
|
426
|
+
when.__doc__ = pysparkfuncs.when.__doc__
|
|
427
|
+
|
|
428
|
+
|
|
429
|
+
# Sort Functions
|
|
430
|
+
|
|
431
|
+
|
|
432
|
+
def asc(col: "ColumnOrName") -> Column:
|
|
433
|
+
return _to_col(col).asc()
|
|
434
|
+
|
|
435
|
+
|
|
436
|
+
asc.__doc__ = pysparkfuncs.asc.__doc__
|
|
437
|
+
|
|
438
|
+
|
|
439
|
+
def asc_nulls_first(col: "ColumnOrName") -> Column:
|
|
440
|
+
return _to_col(col).asc_nulls_first()
|
|
441
|
+
|
|
442
|
+
|
|
443
|
+
asc_nulls_first.__doc__ = pysparkfuncs.asc_nulls_first.__doc__
|
|
444
|
+
|
|
445
|
+
|
|
446
|
+
def asc_nulls_last(col: "ColumnOrName") -> Column:
|
|
447
|
+
return _to_col(col).asc_nulls_last()
|
|
448
|
+
|
|
449
|
+
|
|
450
|
+
asc_nulls_last.__doc__ = pysparkfuncs.asc_nulls_last.__doc__
|
|
451
|
+
|
|
452
|
+
|
|
453
|
+
def desc(col: "ColumnOrName") -> Column:
|
|
454
|
+
return _to_col(col).desc()
|
|
455
|
+
|
|
456
|
+
|
|
457
|
+
desc.__doc__ = pysparkfuncs.desc.__doc__
|
|
458
|
+
|
|
459
|
+
|
|
460
|
+
def desc_nulls_first(col: "ColumnOrName") -> Column:
|
|
461
|
+
return _to_col(col).desc_nulls_first()
|
|
462
|
+
|
|
463
|
+
|
|
464
|
+
desc_nulls_first.__doc__ = pysparkfuncs.desc_nulls_first.__doc__
|
|
465
|
+
|
|
466
|
+
|
|
467
|
+
def desc_nulls_last(col: "ColumnOrName") -> Column:
|
|
468
|
+
return _to_col(col).desc_nulls_last()
|
|
469
|
+
|
|
470
|
+
|
|
471
|
+
desc_nulls_last.__doc__ = pysparkfuncs.desc_nulls_last.__doc__
|
|
472
|
+
|
|
473
|
+
|
|
474
|
+
# Math Functions
|
|
475
|
+
|
|
476
|
+
|
|
477
|
+
def abs(col: "ColumnOrName") -> Column:
|
|
478
|
+
return _invoke_function_over_columns("abs", col)
|
|
479
|
+
|
|
480
|
+
|
|
481
|
+
abs.__doc__ = pysparkfuncs.abs.__doc__
|
|
482
|
+
|
|
483
|
+
|
|
484
|
+
def acos(col: "ColumnOrName") -> Column:
|
|
485
|
+
return _invoke_function_over_columns("acos", col)
|
|
486
|
+
|
|
487
|
+
|
|
488
|
+
acos.__doc__ = pysparkfuncs.acos.__doc__
|
|
489
|
+
|
|
490
|
+
|
|
491
|
+
def acosh(col: "ColumnOrName") -> Column:
|
|
492
|
+
return _invoke_function_over_columns("acosh", col)
|
|
493
|
+
|
|
494
|
+
|
|
495
|
+
acosh.__doc__ = pysparkfuncs.acosh.__doc__
|
|
496
|
+
|
|
497
|
+
|
|
498
|
+
def asin(col: "ColumnOrName") -> Column:
|
|
499
|
+
return _invoke_function_over_columns("asin", col)
|
|
500
|
+
|
|
501
|
+
|
|
502
|
+
asin.__doc__ = pysparkfuncs.asin.__doc__
|
|
503
|
+
|
|
504
|
+
|
|
505
|
+
def asinh(col: "ColumnOrName") -> Column:
|
|
506
|
+
return _invoke_function_over_columns("asinh", col)
|
|
507
|
+
|
|
508
|
+
|
|
509
|
+
asinh.__doc__ = pysparkfuncs.asinh.__doc__
|
|
510
|
+
|
|
511
|
+
|
|
512
|
+
def atan(col: "ColumnOrName") -> Column:
|
|
513
|
+
return _invoke_function_over_columns("atan", col)
|
|
514
|
+
|
|
515
|
+
|
|
516
|
+
atan.__doc__ = pysparkfuncs.atan.__doc__
|
|
517
|
+
|
|
518
|
+
|
|
519
|
+
def atan2(col1: Union["ColumnOrName", float], col2: Union["ColumnOrName", float]) -> Column:
|
|
520
|
+
return _invoke_binary_math_function("atan2", col1, col2)
|
|
521
|
+
|
|
522
|
+
|
|
523
|
+
atan2.__doc__ = pysparkfuncs.atan2.__doc__
|
|
524
|
+
|
|
525
|
+
|
|
526
|
+
def atanh(col: "ColumnOrName") -> Column:
|
|
527
|
+
return _invoke_function_over_columns("atanh", col)
|
|
528
|
+
|
|
529
|
+
|
|
530
|
+
atanh.__doc__ = pysparkfuncs.atanh.__doc__
|
|
531
|
+
|
|
532
|
+
|
|
533
|
+
def bin(col: "ColumnOrName") -> Column:
|
|
534
|
+
return _invoke_function_over_columns("bin", col)
|
|
535
|
+
|
|
536
|
+
|
|
537
|
+
bin.__doc__ = pysparkfuncs.bin.__doc__
|
|
538
|
+
|
|
539
|
+
|
|
540
|
+
def bround(col: "ColumnOrName", scale: int = 0) -> Column:
|
|
541
|
+
return _invoke_function("bround", _to_col(col), lit(scale))
|
|
542
|
+
|
|
543
|
+
|
|
544
|
+
bround.__doc__ = pysparkfuncs.bround.__doc__
|
|
545
|
+
|
|
546
|
+
|
|
547
|
+
def cbrt(col: "ColumnOrName") -> Column:
|
|
548
|
+
return _invoke_function_over_columns("cbrt", col)
|
|
549
|
+
|
|
550
|
+
|
|
551
|
+
cbrt.__doc__ = pysparkfuncs.cbrt.__doc__
|
|
552
|
+
|
|
553
|
+
|
|
554
|
+
def ceil(col: "ColumnOrName") -> Column:
|
|
555
|
+
return _invoke_function_over_columns("ceil", col)
|
|
556
|
+
|
|
557
|
+
|
|
558
|
+
ceil.__doc__ = pysparkfuncs.ceil.__doc__
|
|
559
|
+
|
|
560
|
+
|
|
561
|
+
def ceiling(col: "ColumnOrName") -> Column:
|
|
562
|
+
return _invoke_function_over_columns("ceiling", col)
|
|
563
|
+
|
|
564
|
+
|
|
565
|
+
ceiling.__doc__ = pysparkfuncs.ceiling.__doc__
|
|
566
|
+
|
|
567
|
+
|
|
568
|
+
def conv(col: "ColumnOrName", fromBase: int, toBase: int) -> Column:
|
|
569
|
+
return _invoke_function("conv", _to_col(col), lit(fromBase), lit(toBase))
|
|
570
|
+
|
|
571
|
+
|
|
572
|
+
conv.__doc__ = pysparkfuncs.conv.__doc__
|
|
573
|
+
|
|
574
|
+
|
|
575
|
+
def cos(col: "ColumnOrName") -> Column:
|
|
576
|
+
return _invoke_function_over_columns("cos", col)
|
|
577
|
+
|
|
578
|
+
|
|
579
|
+
cos.__doc__ = pysparkfuncs.cos.__doc__
|
|
580
|
+
|
|
581
|
+
|
|
582
|
+
def cosh(col: "ColumnOrName") -> Column:
|
|
583
|
+
return _invoke_function_over_columns("cosh", col)
|
|
584
|
+
|
|
585
|
+
|
|
586
|
+
cosh.__doc__ = pysparkfuncs.cosh.__doc__
|
|
587
|
+
|
|
588
|
+
|
|
589
|
+
def cot(col: "ColumnOrName") -> Column:
|
|
590
|
+
return _invoke_function_over_columns("cot", col)
|
|
591
|
+
|
|
592
|
+
|
|
593
|
+
cot.__doc__ = pysparkfuncs.cot.__doc__
|
|
594
|
+
|
|
595
|
+
|
|
596
|
+
def csc(col: "ColumnOrName") -> Column:
|
|
597
|
+
return _invoke_function_over_columns("csc", col)
|
|
598
|
+
|
|
599
|
+
|
|
600
|
+
csc.__doc__ = pysparkfuncs.csc.__doc__
|
|
601
|
+
|
|
602
|
+
|
|
603
|
+
def degrees(col: "ColumnOrName") -> Column:
|
|
604
|
+
return _invoke_function_over_columns("degrees", col)
|
|
605
|
+
|
|
606
|
+
|
|
607
|
+
degrees.__doc__ = pysparkfuncs.degrees.__doc__
|
|
608
|
+
|
|
609
|
+
|
|
610
|
+
def e() -> Column:
|
|
611
|
+
return _invoke_function("e")
|
|
612
|
+
|
|
613
|
+
|
|
614
|
+
e.__doc__ = pysparkfuncs.e.__doc__
|
|
615
|
+
|
|
616
|
+
|
|
617
|
+
def exp(col: "ColumnOrName") -> Column:
|
|
618
|
+
return _invoke_function_over_columns("exp", col)
|
|
619
|
+
|
|
620
|
+
|
|
621
|
+
exp.__doc__ = pysparkfuncs.exp.__doc__
|
|
622
|
+
|
|
623
|
+
|
|
624
|
+
def expm1(col: "ColumnOrName") -> Column:
|
|
625
|
+
return _invoke_function_over_columns("expm1", col)
|
|
626
|
+
|
|
627
|
+
|
|
628
|
+
expm1.__doc__ = pysparkfuncs.expm1.__doc__
|
|
629
|
+
|
|
630
|
+
|
|
631
|
+
def factorial(col: "ColumnOrName") -> Column:
|
|
632
|
+
return _invoke_function_over_columns("factorial", col)
|
|
633
|
+
|
|
634
|
+
|
|
635
|
+
factorial.__doc__ = pysparkfuncs.factorial.__doc__
|
|
636
|
+
|
|
637
|
+
|
|
638
|
+
def floor(col: "ColumnOrName") -> Column:
|
|
639
|
+
return _invoke_function_over_columns("floor", col)
|
|
640
|
+
|
|
641
|
+
|
|
642
|
+
floor.__doc__ = pysparkfuncs.floor.__doc__
|
|
643
|
+
|
|
644
|
+
|
|
645
|
+
def hex(col: "ColumnOrName") -> Column:
|
|
646
|
+
return _invoke_function_over_columns("hex", col)
|
|
647
|
+
|
|
648
|
+
|
|
649
|
+
hex.__doc__ = pysparkfuncs.hex.__doc__
|
|
650
|
+
|
|
651
|
+
|
|
652
|
+
def hypot(col1: Union["ColumnOrName", float], col2: Union["ColumnOrName", float]) -> Column:
|
|
653
|
+
return _invoke_binary_math_function("hypot", col1, col2)
|
|
654
|
+
|
|
655
|
+
|
|
656
|
+
hypot.__doc__ = pysparkfuncs.hypot.__doc__
|
|
657
|
+
|
|
658
|
+
|
|
659
|
+
def log(arg1: Union["ColumnOrName", float], arg2: Optional["ColumnOrName"] = None) -> Column:
|
|
660
|
+
if arg2 is None:
|
|
661
|
+
# in this case, arg1 should be "ColumnOrName"
|
|
662
|
+
return _invoke_function("ln", _to_col(cast("ColumnOrName", arg1)))
|
|
663
|
+
else:
|
|
664
|
+
# in this case, arg1 should be a float
|
|
665
|
+
return _invoke_function("log", lit(cast(float, arg1)), _to_col(arg2))
|
|
666
|
+
|
|
667
|
+
|
|
668
|
+
log.__doc__ = pysparkfuncs.log.__doc__
|
|
669
|
+
|
|
670
|
+
|
|
671
|
+
def log10(col: "ColumnOrName") -> Column:
|
|
672
|
+
return _invoke_function_over_columns("log10", col)
|
|
673
|
+
|
|
674
|
+
|
|
675
|
+
log10.__doc__ = pysparkfuncs.log10.__doc__
|
|
676
|
+
|
|
677
|
+
|
|
678
|
+
def log1p(col: "ColumnOrName") -> Column:
|
|
679
|
+
return _invoke_function_over_columns("log1p", col)
|
|
680
|
+
|
|
681
|
+
|
|
682
|
+
log1p.__doc__ = pysparkfuncs.log1p.__doc__
|
|
683
|
+
|
|
684
|
+
|
|
685
|
+
def ln(col: "ColumnOrName") -> Column:
|
|
686
|
+
return _invoke_function_over_columns("ln", col)
|
|
687
|
+
|
|
688
|
+
|
|
689
|
+
ln.__doc__ = pysparkfuncs.ln.__doc__
|
|
690
|
+
|
|
691
|
+
|
|
692
|
+
def log2(col: "ColumnOrName") -> Column:
|
|
693
|
+
return _invoke_function_over_columns("log2", col)
|
|
694
|
+
|
|
695
|
+
|
|
696
|
+
log2.__doc__ = pysparkfuncs.log2.__doc__
|
|
697
|
+
|
|
698
|
+
|
|
699
|
+
def negative(col: "ColumnOrName") -> Column:
|
|
700
|
+
return _invoke_function_over_columns("negative", col)
|
|
701
|
+
|
|
702
|
+
|
|
703
|
+
negative.__doc__ = pysparkfuncs.negative.__doc__
|
|
704
|
+
|
|
705
|
+
|
|
706
|
+
negate = negative
|
|
707
|
+
|
|
708
|
+
|
|
709
|
+
def pi() -> Column:
|
|
710
|
+
return _invoke_function("pi")
|
|
711
|
+
|
|
712
|
+
|
|
713
|
+
pi.__doc__ = pysparkfuncs.pi.__doc__
|
|
714
|
+
|
|
715
|
+
|
|
716
|
+
def positive(col: "ColumnOrName") -> Column:
|
|
717
|
+
return _invoke_function_over_columns("positive", col)
|
|
718
|
+
|
|
719
|
+
|
|
720
|
+
positive.__doc__ = pysparkfuncs.positive.__doc__
|
|
721
|
+
|
|
722
|
+
|
|
723
|
+
def pmod(dividend: Union["ColumnOrName", float], divisor: Union["ColumnOrName", float]) -> Column:
|
|
724
|
+
return _invoke_binary_math_function("pmod", dividend, divisor)
|
|
725
|
+
|
|
726
|
+
|
|
727
|
+
pmod.__doc__ = pysparkfuncs.pmod.__doc__
|
|
728
|
+
|
|
729
|
+
|
|
730
|
+
def width_bucket(
|
|
731
|
+
v: "ColumnOrName",
|
|
732
|
+
min: "ColumnOrName",
|
|
733
|
+
max: "ColumnOrName",
|
|
734
|
+
numBucket: Union["ColumnOrName", int],
|
|
735
|
+
) -> Column:
|
|
736
|
+
numBucket = lit(numBucket) if isinstance(numBucket, int) else numBucket
|
|
737
|
+
return _invoke_function_over_columns("width_bucket", v, min, max, numBucket)
|
|
738
|
+
|
|
739
|
+
|
|
740
|
+
width_bucket.__doc__ = pysparkfuncs.width_bucket.__doc__
|
|
741
|
+
|
|
742
|
+
|
|
743
|
+
def pow(col1: Union["ColumnOrName", float], col2: Union["ColumnOrName", float]) -> Column:
|
|
744
|
+
return _invoke_binary_math_function("power", col1, col2)
|
|
745
|
+
|
|
746
|
+
|
|
747
|
+
pow.__doc__ = pysparkfuncs.pow.__doc__
|
|
748
|
+
|
|
749
|
+
|
|
750
|
+
def radians(col: "ColumnOrName") -> Column:
|
|
751
|
+
return _invoke_function_over_columns("radians", col)
|
|
752
|
+
|
|
753
|
+
|
|
754
|
+
radians.__doc__ = pysparkfuncs.radians.__doc__
|
|
755
|
+
|
|
756
|
+
|
|
757
|
+
def rint(col: "ColumnOrName") -> Column:
|
|
758
|
+
return _invoke_function_over_columns("rint", col)
|
|
759
|
+
|
|
760
|
+
|
|
761
|
+
rint.__doc__ = pysparkfuncs.rint.__doc__
|
|
762
|
+
|
|
763
|
+
|
|
764
|
+
def round(col: "ColumnOrName", scale: int = 0) -> Column:
|
|
765
|
+
return _invoke_function("round", _to_col(col), lit(scale))
|
|
766
|
+
|
|
767
|
+
|
|
768
|
+
round.__doc__ = pysparkfuncs.round.__doc__
|
|
769
|
+
|
|
770
|
+
|
|
771
|
+
def sec(col: "ColumnOrName") -> Column:
|
|
772
|
+
return _invoke_function_over_columns("sec", col)
|
|
773
|
+
|
|
774
|
+
|
|
775
|
+
sec.__doc__ = pysparkfuncs.sec.__doc__
|
|
776
|
+
|
|
777
|
+
|
|
778
|
+
def shiftLeft(col: "ColumnOrName", numBits: int) -> Column:
|
|
779
|
+
warnings.warn("Deprecated in 3.4, use shiftleft instead.", FutureWarning)
|
|
780
|
+
return shiftleft(col, numBits)
|
|
781
|
+
|
|
782
|
+
|
|
783
|
+
shiftLeft.__doc__ = pysparkfuncs.shiftLeft.__doc__
|
|
784
|
+
|
|
785
|
+
|
|
786
|
+
def shiftleft(col: "ColumnOrName", numBits: int) -> Column:
|
|
787
|
+
return _invoke_function("shiftleft", _to_col(col), lit(numBits))
|
|
788
|
+
|
|
789
|
+
|
|
790
|
+
shiftleft.__doc__ = pysparkfuncs.shiftleft.__doc__
|
|
791
|
+
|
|
792
|
+
|
|
793
|
+
def shiftRight(col: "ColumnOrName", numBits: int) -> Column:
|
|
794
|
+
warnings.warn("Deprecated in 3.4, use shiftright instead.", FutureWarning)
|
|
795
|
+
return shiftright(col, numBits)
|
|
796
|
+
|
|
797
|
+
|
|
798
|
+
shiftRight.__doc__ = pysparkfuncs.shiftRight.__doc__
|
|
799
|
+
|
|
800
|
+
|
|
801
|
+
def shiftright(col: "ColumnOrName", numBits: int) -> Column:
|
|
802
|
+
return _invoke_function("shiftright", _to_col(col), lit(numBits))
|
|
803
|
+
|
|
804
|
+
|
|
805
|
+
shiftright.__doc__ = pysparkfuncs.shiftright.__doc__
|
|
806
|
+
|
|
807
|
+
|
|
808
|
+
def shiftRightUnsigned(col: "ColumnOrName", numBits: int) -> Column:
|
|
809
|
+
warnings.warn("Deprecated in 3.4, use shiftrightunsigned instead.", FutureWarning)
|
|
810
|
+
return shiftrightunsigned(col, numBits)
|
|
811
|
+
|
|
812
|
+
|
|
813
|
+
shiftRightUnsigned.__doc__ = pysparkfuncs.shiftRightUnsigned.__doc__
|
|
814
|
+
|
|
815
|
+
|
|
816
|
+
def shiftrightunsigned(col: "ColumnOrName", numBits: int) -> Column:
|
|
817
|
+
return _invoke_function("shiftrightunsigned", _to_col(col), lit(numBits))
|
|
818
|
+
|
|
819
|
+
|
|
820
|
+
shiftrightunsigned.__doc__ = pysparkfuncs.shiftrightunsigned.__doc__
|
|
821
|
+
|
|
822
|
+
|
|
823
|
+
def signum(col: "ColumnOrName") -> Column:
|
|
824
|
+
return _invoke_function_over_columns("signum", col)
|
|
825
|
+
|
|
826
|
+
|
|
827
|
+
signum.__doc__ = pysparkfuncs.signum.__doc__
|
|
828
|
+
|
|
829
|
+
|
|
830
|
+
def sign(col: "ColumnOrName") -> Column:
|
|
831
|
+
return _invoke_function_over_columns("sign", col)
|
|
832
|
+
|
|
833
|
+
|
|
834
|
+
sign.__doc__ = pysparkfuncs.sign.__doc__
|
|
835
|
+
|
|
836
|
+
|
|
837
|
+
def sin(col: "ColumnOrName") -> Column:
|
|
838
|
+
return _invoke_function_over_columns("sin", col)
|
|
839
|
+
|
|
840
|
+
|
|
841
|
+
sin.__doc__ = pysparkfuncs.sin.__doc__
|
|
842
|
+
|
|
843
|
+
|
|
844
|
+
def sinh(col: "ColumnOrName") -> Column:
|
|
845
|
+
return _invoke_function_over_columns("sinh", col)
|
|
846
|
+
|
|
847
|
+
|
|
848
|
+
sinh.__doc__ = pysparkfuncs.sinh.__doc__
|
|
849
|
+
|
|
850
|
+
|
|
851
|
+
def sqrt(col: "ColumnOrName") -> Column:
|
|
852
|
+
return _invoke_function_over_columns("sqrt", col)
|
|
853
|
+
|
|
854
|
+
|
|
855
|
+
sqrt.__doc__ = pysparkfuncs.sqrt.__doc__
|
|
856
|
+
|
|
857
|
+
|
|
858
|
+
def try_add(left: "ColumnOrName", right: "ColumnOrName") -> Column:
|
|
859
|
+
return _invoke_function_over_columns("try_add", left, right)
|
|
860
|
+
|
|
861
|
+
|
|
862
|
+
try_add.__doc__ = pysparkfuncs.try_add.__doc__
|
|
863
|
+
|
|
864
|
+
|
|
865
|
+
def try_avg(col: "ColumnOrName") -> Column:
|
|
866
|
+
return _invoke_function_over_columns("try_avg", col)
|
|
867
|
+
|
|
868
|
+
|
|
869
|
+
try_avg.__doc__ = pysparkfuncs.try_avg.__doc__
|
|
870
|
+
|
|
871
|
+
|
|
872
|
+
def try_divide(left: "ColumnOrName", right: "ColumnOrName") -> Column:
|
|
873
|
+
return _invoke_function_over_columns("try_divide", left, right)
|
|
874
|
+
|
|
875
|
+
|
|
876
|
+
try_divide.__doc__ = pysparkfuncs.try_divide.__doc__
|
|
877
|
+
|
|
878
|
+
|
|
879
|
+
def try_multiply(left: "ColumnOrName", right: "ColumnOrName") -> Column:
|
|
880
|
+
return _invoke_function_over_columns("try_multiply", left, right)
|
|
881
|
+
|
|
882
|
+
|
|
883
|
+
try_multiply.__doc__ = pysparkfuncs.try_multiply.__doc__
|
|
884
|
+
|
|
885
|
+
|
|
886
|
+
def try_subtract(left: "ColumnOrName", right: "ColumnOrName") -> Column:
|
|
887
|
+
return _invoke_function_over_columns("try_subtract", left, right)
|
|
888
|
+
|
|
889
|
+
|
|
890
|
+
try_subtract.__doc__ = pysparkfuncs.try_subtract.__doc__
|
|
891
|
+
|
|
892
|
+
|
|
893
|
+
def try_sum(col: "ColumnOrName") -> Column:
|
|
894
|
+
return _invoke_function_over_columns("try_sum", col)
|
|
895
|
+
|
|
896
|
+
|
|
897
|
+
try_sum.__doc__ = pysparkfuncs.try_sum.__doc__
|
|
898
|
+
|
|
899
|
+
|
|
900
|
+
def tan(col: "ColumnOrName") -> Column:
|
|
901
|
+
return _invoke_function_over_columns("tan", col)
|
|
902
|
+
|
|
903
|
+
|
|
904
|
+
tan.__doc__ = pysparkfuncs.tan.__doc__
|
|
905
|
+
|
|
906
|
+
|
|
907
|
+
def tanh(col: "ColumnOrName") -> Column:
|
|
908
|
+
return _invoke_function_over_columns("tanh", col)
|
|
909
|
+
|
|
910
|
+
|
|
911
|
+
tanh.__doc__ = pysparkfuncs.tanh.__doc__
|
|
912
|
+
|
|
913
|
+
|
|
914
|
+
def toDegrees(col: "ColumnOrName") -> Column:
|
|
915
|
+
warnings.warn("Deprecated in 3.4, use degrees instead.", FutureWarning)
|
|
916
|
+
return degrees(col)
|
|
917
|
+
|
|
918
|
+
|
|
919
|
+
toDegrees.__doc__ = pysparkfuncs.toDegrees.__doc__
|
|
920
|
+
|
|
921
|
+
|
|
922
|
+
def toRadians(col: "ColumnOrName") -> Column:
|
|
923
|
+
warnings.warn("Deprecated in 3.4, use radians instead.", FutureWarning)
|
|
924
|
+
return radians(col)
|
|
925
|
+
|
|
926
|
+
|
|
927
|
+
toRadians.__doc__ = pysparkfuncs.toRadians.__doc__
|
|
928
|
+
|
|
929
|
+
|
|
930
|
+
def unhex(col: "ColumnOrName") -> Column:
|
|
931
|
+
return _invoke_function_over_columns("unhex", col)
|
|
932
|
+
|
|
933
|
+
|
|
934
|
+
unhex.__doc__ = pysparkfuncs.unhex.__doc__
|
|
935
|
+
|
|
936
|
+
|
|
937
|
+
def approxCountDistinct(col: "ColumnOrName", rsd: Optional[float] = None) -> Column:
|
|
938
|
+
warnings.warn("Deprecated in 3.4, use approx_count_distinct instead.", FutureWarning)
|
|
939
|
+
return approx_count_distinct(col, rsd)
|
|
940
|
+
|
|
941
|
+
|
|
942
|
+
approxCountDistinct.__doc__ = pysparkfuncs.approxCountDistinct.__doc__
|
|
943
|
+
|
|
944
|
+
|
|
945
|
+
def approx_count_distinct(col: "ColumnOrName", rsd: Optional[float] = None) -> Column:
|
|
946
|
+
if rsd is None:
|
|
947
|
+
return _invoke_function("approx_count_distinct", _to_col(col))
|
|
948
|
+
else:
|
|
949
|
+
return _invoke_function("approx_count_distinct", _to_col(col), lit(rsd))
|
|
950
|
+
|
|
951
|
+
|
|
952
|
+
approx_count_distinct.__doc__ = pysparkfuncs.approx_count_distinct.__doc__
|
|
953
|
+
|
|
954
|
+
|
|
955
|
+
def avg(col: "ColumnOrName") -> Column:
|
|
956
|
+
return _invoke_function_over_columns("avg", col)
|
|
957
|
+
|
|
958
|
+
|
|
959
|
+
avg.__doc__ = pysparkfuncs.avg.__doc__
|
|
960
|
+
|
|
961
|
+
|
|
962
|
+
def collect_list(col: "ColumnOrName") -> Column:
|
|
963
|
+
return _invoke_function_over_columns("collect_list", col)
|
|
964
|
+
|
|
965
|
+
|
|
966
|
+
collect_list.__doc__ = pysparkfuncs.collect_list.__doc__
|
|
967
|
+
|
|
968
|
+
|
|
969
|
+
def array_agg(col: "ColumnOrName") -> Column:
|
|
970
|
+
return _invoke_function_over_columns("array_agg", col)
|
|
971
|
+
|
|
972
|
+
|
|
973
|
+
array_agg.__doc__ = pysparkfuncs.array_agg.__doc__
|
|
974
|
+
|
|
975
|
+
|
|
976
|
+
def collect_set(col: "ColumnOrName") -> Column:
|
|
977
|
+
return _invoke_function_over_columns("collect_set", col)
|
|
978
|
+
|
|
979
|
+
|
|
980
|
+
collect_set.__doc__ = pysparkfuncs.collect_set.__doc__
|
|
981
|
+
|
|
982
|
+
|
|
983
|
+
def corr(col1: "ColumnOrName", col2: "ColumnOrName") -> Column:
|
|
984
|
+
return _invoke_function_over_columns("corr", col1, col2)
|
|
985
|
+
|
|
986
|
+
|
|
987
|
+
corr.__doc__ = pysparkfuncs.corr.__doc__
|
|
988
|
+
|
|
989
|
+
|
|
990
|
+
def count(col: "ColumnOrName") -> Column:
|
|
991
|
+
return _invoke_function_over_columns("count", col)
|
|
992
|
+
|
|
993
|
+
|
|
994
|
+
count.__doc__ = pysparkfuncs.count.__doc__
|
|
995
|
+
|
|
996
|
+
|
|
997
|
+
def countDistinct(col: "ColumnOrName", *cols: "ColumnOrName") -> Column:
|
|
998
|
+
return count_distinct(col, *cols)
|
|
999
|
+
|
|
1000
|
+
|
|
1001
|
+
countDistinct.__doc__ = pysparkfuncs.countDistinct.__doc__
|
|
1002
|
+
|
|
1003
|
+
|
|
1004
|
+
def count_distinct(col: "ColumnOrName", *cols: "ColumnOrName") -> Column:
|
|
1005
|
+
_exprs = [_to_col(c)._expr for c in [col] + list(cols)]
|
|
1006
|
+
return Column(UnresolvedFunction("count", _exprs, is_distinct=True))
|
|
1007
|
+
|
|
1008
|
+
|
|
1009
|
+
count_distinct.__doc__ = pysparkfuncs.count_distinct.__doc__
|
|
1010
|
+
|
|
1011
|
+
|
|
1012
|
+
def covar_pop(col1: "ColumnOrName", col2: "ColumnOrName") -> Column:
|
|
1013
|
+
return _invoke_function_over_columns("covar_pop", col1, col2)
|
|
1014
|
+
|
|
1015
|
+
|
|
1016
|
+
covar_pop.__doc__ = pysparkfuncs.covar_pop.__doc__
|
|
1017
|
+
|
|
1018
|
+
|
|
1019
|
+
def covar_samp(col1: "ColumnOrName", col2: "ColumnOrName") -> Column:
|
|
1020
|
+
return _invoke_function_over_columns("covar_samp", col1, col2)
|
|
1021
|
+
|
|
1022
|
+
|
|
1023
|
+
covar_samp.__doc__ = pysparkfuncs.covar_samp.__doc__
|
|
1024
|
+
|
|
1025
|
+
|
|
1026
|
+
def first(col: "ColumnOrName", ignorenulls: bool = False) -> Column:
|
|
1027
|
+
return _invoke_function("first", _to_col(col), lit(ignorenulls))
|
|
1028
|
+
|
|
1029
|
+
|
|
1030
|
+
first.__doc__ = pysparkfuncs.first.__doc__
|
|
1031
|
+
|
|
1032
|
+
|
|
1033
|
+
def grouping(col: "ColumnOrName") -> Column:
|
|
1034
|
+
return _invoke_function_over_columns("grouping", col)
|
|
1035
|
+
|
|
1036
|
+
|
|
1037
|
+
grouping.__doc__ = pysparkfuncs.grouping.__doc__
|
|
1038
|
+
|
|
1039
|
+
|
|
1040
|
+
def grouping_id(*cols: "ColumnOrName") -> Column:
|
|
1041
|
+
return _invoke_function_over_columns("grouping_id", *cols)
|
|
1042
|
+
|
|
1043
|
+
|
|
1044
|
+
grouping_id.__doc__ = pysparkfuncs.grouping_id.__doc__
|
|
1045
|
+
|
|
1046
|
+
|
|
1047
|
+
def count_min_sketch(
|
|
1048
|
+
col: "ColumnOrName",
|
|
1049
|
+
eps: "ColumnOrName",
|
|
1050
|
+
confidence: "ColumnOrName",
|
|
1051
|
+
seed: "ColumnOrName",
|
|
1052
|
+
) -> Column:
|
|
1053
|
+
return _invoke_function_over_columns("count_min_sketch", col, eps, confidence, seed)
|
|
1054
|
+
|
|
1055
|
+
|
|
1056
|
+
count_min_sketch.__doc__ = pysparkfuncs.count_min_sketch.__doc__
|
|
1057
|
+
|
|
1058
|
+
|
|
1059
|
+
def kurtosis(col: "ColumnOrName") -> Column:
|
|
1060
|
+
return _invoke_function_over_columns("kurtosis", col)
|
|
1061
|
+
|
|
1062
|
+
|
|
1063
|
+
kurtosis.__doc__ = pysparkfuncs.kurtosis.__doc__
|
|
1064
|
+
|
|
1065
|
+
|
|
1066
|
+
def last(col: "ColumnOrName", ignorenulls: bool = False) -> Column:
|
|
1067
|
+
return _invoke_function("last", _to_col(col), lit(ignorenulls))
|
|
1068
|
+
|
|
1069
|
+
|
|
1070
|
+
last.__doc__ = pysparkfuncs.last.__doc__
|
|
1071
|
+
|
|
1072
|
+
|
|
1073
|
+
def max(col: "ColumnOrName") -> Column:
|
|
1074
|
+
return _invoke_function_over_columns("max", col)
|
|
1075
|
+
|
|
1076
|
+
|
|
1077
|
+
max.__doc__ = pysparkfuncs.max.__doc__
|
|
1078
|
+
|
|
1079
|
+
|
|
1080
|
+
def max_by(col: "ColumnOrName", ord: "ColumnOrName") -> Column:
|
|
1081
|
+
return _invoke_function_over_columns("max_by", col, ord)
|
|
1082
|
+
|
|
1083
|
+
|
|
1084
|
+
max_by.__doc__ = pysparkfuncs.max_by.__doc__
|
|
1085
|
+
|
|
1086
|
+
|
|
1087
|
+
def mean(col: "ColumnOrName") -> Column:
|
|
1088
|
+
return avg(col)
|
|
1089
|
+
|
|
1090
|
+
|
|
1091
|
+
mean.__doc__ = pysparkfuncs.mean.__doc__
|
|
1092
|
+
|
|
1093
|
+
|
|
1094
|
+
def median(col: "ColumnOrName") -> Column:
|
|
1095
|
+
return _invoke_function_over_columns("median", col)
|
|
1096
|
+
|
|
1097
|
+
|
|
1098
|
+
median.__doc__ = pysparkfuncs.median.__doc__
|
|
1099
|
+
|
|
1100
|
+
|
|
1101
|
+
def min(col: "ColumnOrName") -> Column:
|
|
1102
|
+
return _invoke_function_over_columns("min", col)
|
|
1103
|
+
|
|
1104
|
+
|
|
1105
|
+
min.__doc__ = pysparkfuncs.min.__doc__
|
|
1106
|
+
|
|
1107
|
+
|
|
1108
|
+
def min_by(col: "ColumnOrName", ord: "ColumnOrName") -> Column:
|
|
1109
|
+
return _invoke_function_over_columns("min_by", col, ord)
|
|
1110
|
+
|
|
1111
|
+
|
|
1112
|
+
min_by.__doc__ = pysparkfuncs.min_by.__doc__
|
|
1113
|
+
|
|
1114
|
+
|
|
1115
|
+
def mode(col: "ColumnOrName") -> Column:
|
|
1116
|
+
return _invoke_function_over_columns("mode", col)
|
|
1117
|
+
|
|
1118
|
+
|
|
1119
|
+
mode.__doc__ = pysparkfuncs.mode.__doc__
|
|
1120
|
+
|
|
1121
|
+
|
|
1122
|
+
def percentile(
|
|
1123
|
+
col: "ColumnOrName",
|
|
1124
|
+
percentage: Union[Column, float, List[float], Tuple[float]],
|
|
1125
|
+
frequency: Union[Column, int] = 1,
|
|
1126
|
+
) -> Column:
|
|
1127
|
+
if isinstance(percentage, Column):
|
|
1128
|
+
_percentage = percentage
|
|
1129
|
+
elif isinstance(percentage, (list, tuple)):
|
|
1130
|
+
# Convert tuple to list
|
|
1131
|
+
_percentage = lit(list(percentage))
|
|
1132
|
+
else:
|
|
1133
|
+
# Probably scalar
|
|
1134
|
+
_percentage = lit(percentage)
|
|
1135
|
+
|
|
1136
|
+
if isinstance(frequency, int):
|
|
1137
|
+
_frequency = lit(frequency)
|
|
1138
|
+
elif isinstance(frequency, Column):
|
|
1139
|
+
_frequency = frequency
|
|
1140
|
+
else:
|
|
1141
|
+
raise PySparkTypeError(
|
|
1142
|
+
error_class="NOT_COLUMN_OR_INT",
|
|
1143
|
+
message_parameters={
|
|
1144
|
+
"arg_name": "frequency",
|
|
1145
|
+
"arg_type": type(frequency).__name__,
|
|
1146
|
+
},
|
|
1147
|
+
)
|
|
1148
|
+
|
|
1149
|
+
return _invoke_function("percentile", _to_col(col), _percentage, _frequency)
|
|
1150
|
+
|
|
1151
|
+
|
|
1152
|
+
percentile.__doc__ = pysparkfuncs.percentile.__doc__
|
|
1153
|
+
|
|
1154
|
+
|
|
1155
|
+
def percentile_approx(
|
|
1156
|
+
col: "ColumnOrName",
|
|
1157
|
+
percentage: Union[Column, float, List[float], Tuple[float]],
|
|
1158
|
+
accuracy: Union[Column, float] = 10000,
|
|
1159
|
+
) -> Column:
|
|
1160
|
+
if isinstance(percentage, Column):
|
|
1161
|
+
percentage_col = percentage
|
|
1162
|
+
elif isinstance(percentage, (list, tuple)):
|
|
1163
|
+
# Convert tuple to list
|
|
1164
|
+
percentage_col = lit(list(percentage))
|
|
1165
|
+
else:
|
|
1166
|
+
# Probably scalar
|
|
1167
|
+
percentage_col = lit(percentage)
|
|
1168
|
+
|
|
1169
|
+
return _invoke_function("percentile_approx", _to_col(col), percentage_col, lit(accuracy))
|
|
1170
|
+
|
|
1171
|
+
|
|
1172
|
+
percentile_approx.__doc__ = pysparkfuncs.percentile_approx.__doc__
|
|
1173
|
+
|
|
1174
|
+
|
|
1175
|
+
def approx_percentile(
|
|
1176
|
+
col: "ColumnOrName",
|
|
1177
|
+
percentage: Union[Column, float, List[float], Tuple[float]],
|
|
1178
|
+
accuracy: Union[Column, float] = 10000,
|
|
1179
|
+
) -> Column:
|
|
1180
|
+
if isinstance(percentage, Column):
|
|
1181
|
+
percentage_col = percentage
|
|
1182
|
+
elif isinstance(percentage, (list, tuple)):
|
|
1183
|
+
# Convert tuple to list
|
|
1184
|
+
percentage_col = lit(list(percentage))
|
|
1185
|
+
else:
|
|
1186
|
+
# Probably scalar
|
|
1187
|
+
percentage_col = lit(percentage)
|
|
1188
|
+
|
|
1189
|
+
return _invoke_function("approx_percentile", _to_col(col), percentage_col, lit(accuracy))
|
|
1190
|
+
|
|
1191
|
+
|
|
1192
|
+
approx_percentile.__doc__ = pysparkfuncs.approx_percentile.__doc__
|
|
1193
|
+
|
|
1194
|
+
|
|
1195
|
+
def product(col: "ColumnOrName") -> Column:
|
|
1196
|
+
return _invoke_function_over_columns("product", col)
|
|
1197
|
+
|
|
1198
|
+
|
|
1199
|
+
product.__doc__ = pysparkfuncs.product.__doc__
|
|
1200
|
+
|
|
1201
|
+
|
|
1202
|
+
def skewness(col: "ColumnOrName") -> Column:
|
|
1203
|
+
return _invoke_function_over_columns("skewness", col)
|
|
1204
|
+
|
|
1205
|
+
|
|
1206
|
+
skewness.__doc__ = pysparkfuncs.skewness.__doc__
|
|
1207
|
+
|
|
1208
|
+
|
|
1209
|
+
def stddev(col: "ColumnOrName") -> Column:
|
|
1210
|
+
return _invoke_function_over_columns("stddev", col)
|
|
1211
|
+
|
|
1212
|
+
|
|
1213
|
+
stddev.__doc__ = pysparkfuncs.stddev.__doc__
|
|
1214
|
+
|
|
1215
|
+
|
|
1216
|
+
def std(col: "ColumnOrName") -> Column:
|
|
1217
|
+
return _invoke_function_over_columns("std", col)
|
|
1218
|
+
|
|
1219
|
+
|
|
1220
|
+
std.__doc__ = pysparkfuncs.std.__doc__
|
|
1221
|
+
|
|
1222
|
+
|
|
1223
|
+
def stddev_samp(col: "ColumnOrName") -> Column:
|
|
1224
|
+
return _invoke_function_over_columns("stddev_samp", col)
|
|
1225
|
+
|
|
1226
|
+
|
|
1227
|
+
stddev_samp.__doc__ = pysparkfuncs.stddev_samp.__doc__
|
|
1228
|
+
|
|
1229
|
+
|
|
1230
|
+
def stddev_pop(col: "ColumnOrName") -> Column:
|
|
1231
|
+
return _invoke_function_over_columns("stddev_pop", col)
|
|
1232
|
+
|
|
1233
|
+
|
|
1234
|
+
stddev_pop.__doc__ = pysparkfuncs.stddev_pop.__doc__
|
|
1235
|
+
|
|
1236
|
+
|
|
1237
|
+
def sum(col: "ColumnOrName") -> Column:
|
|
1238
|
+
return _invoke_function_over_columns("sum", col)
|
|
1239
|
+
|
|
1240
|
+
|
|
1241
|
+
sum.__doc__ = pysparkfuncs.sum.__doc__
|
|
1242
|
+
|
|
1243
|
+
|
|
1244
|
+
def sumDistinct(col: "ColumnOrName") -> Column:
|
|
1245
|
+
warnings.warn("Deprecated in 3.4, use sum_distinct instead.", FutureWarning)
|
|
1246
|
+
return sum_distinct(col)
|
|
1247
|
+
|
|
1248
|
+
|
|
1249
|
+
sumDistinct.__doc__ = pysparkfuncs.sumDistinct.__doc__
|
|
1250
|
+
|
|
1251
|
+
|
|
1252
|
+
def sum_distinct(col: "ColumnOrName") -> Column:
|
|
1253
|
+
return Column(UnresolvedFunction("sum", [_to_col(col)._expr], is_distinct=True))
|
|
1254
|
+
|
|
1255
|
+
|
|
1256
|
+
sum_distinct.__doc__ = pysparkfuncs.sum_distinct.__doc__
|
|
1257
|
+
|
|
1258
|
+
|
|
1259
|
+
def var_pop(col: "ColumnOrName") -> Column:
|
|
1260
|
+
return _invoke_function_over_columns("var_pop", col)
|
|
1261
|
+
|
|
1262
|
+
|
|
1263
|
+
var_pop.__doc__ = pysparkfuncs.var_pop.__doc__
|
|
1264
|
+
|
|
1265
|
+
|
|
1266
|
+
def regr_avgx(y: "ColumnOrName", x: "ColumnOrName") -> Column:
|
|
1267
|
+
return _invoke_function_over_columns("regr_avgx", y, x)
|
|
1268
|
+
|
|
1269
|
+
|
|
1270
|
+
regr_avgx.__doc__ = pysparkfuncs.regr_avgx.__doc__
|
|
1271
|
+
|
|
1272
|
+
|
|
1273
|
+
def regr_avgy(y: "ColumnOrName", x: "ColumnOrName") -> Column:
|
|
1274
|
+
return _invoke_function_over_columns("regr_avgy", y, x)
|
|
1275
|
+
|
|
1276
|
+
|
|
1277
|
+
regr_avgy.__doc__ = pysparkfuncs.regr_avgy.__doc__
|
|
1278
|
+
|
|
1279
|
+
|
|
1280
|
+
def regr_count(y: "ColumnOrName", x: "ColumnOrName") -> Column:
|
|
1281
|
+
return _invoke_function_over_columns("regr_count", y, x)
|
|
1282
|
+
|
|
1283
|
+
|
|
1284
|
+
regr_count.__doc__ = pysparkfuncs.regr_count.__doc__
|
|
1285
|
+
|
|
1286
|
+
|
|
1287
|
+
def regr_intercept(y: "ColumnOrName", x: "ColumnOrName") -> Column:
|
|
1288
|
+
return _invoke_function_over_columns("regr_intercept", y, x)
|
|
1289
|
+
|
|
1290
|
+
|
|
1291
|
+
regr_intercept.__doc__ = pysparkfuncs.regr_intercept.__doc__
|
|
1292
|
+
|
|
1293
|
+
|
|
1294
|
+
def regr_r2(y: "ColumnOrName", x: "ColumnOrName") -> Column:
|
|
1295
|
+
return _invoke_function_over_columns("regr_r2", y, x)
|
|
1296
|
+
|
|
1297
|
+
|
|
1298
|
+
regr_r2.__doc__ = pysparkfuncs.regr_r2.__doc__
|
|
1299
|
+
|
|
1300
|
+
|
|
1301
|
+
def regr_slope(y: "ColumnOrName", x: "ColumnOrName") -> Column:
|
|
1302
|
+
return _invoke_function_over_columns("regr_slope", y, x)
|
|
1303
|
+
|
|
1304
|
+
|
|
1305
|
+
regr_slope.__doc__ = pysparkfuncs.regr_slope.__doc__
|
|
1306
|
+
|
|
1307
|
+
|
|
1308
|
+
def regr_sxx(y: "ColumnOrName", x: "ColumnOrName") -> Column:
|
|
1309
|
+
return _invoke_function_over_columns("regr_sxx", y, x)
|
|
1310
|
+
|
|
1311
|
+
|
|
1312
|
+
regr_sxx.__doc__ = pysparkfuncs.regr_sxx.__doc__
|
|
1313
|
+
|
|
1314
|
+
|
|
1315
|
+
def regr_sxy(y: "ColumnOrName", x: "ColumnOrName") -> Column:
|
|
1316
|
+
return _invoke_function_over_columns("regr_sxy", y, x)
|
|
1317
|
+
|
|
1318
|
+
|
|
1319
|
+
regr_sxy.__doc__ = pysparkfuncs.regr_sxy.__doc__
|
|
1320
|
+
|
|
1321
|
+
|
|
1322
|
+
def regr_syy(y: "ColumnOrName", x: "ColumnOrName") -> Column:
|
|
1323
|
+
return _invoke_function_over_columns("regr_syy", y, x)
|
|
1324
|
+
|
|
1325
|
+
|
|
1326
|
+
regr_syy.__doc__ = pysparkfuncs.regr_syy.__doc__
|
|
1327
|
+
|
|
1328
|
+
|
|
1329
|
+
def var_samp(col: "ColumnOrName") -> Column:
|
|
1330
|
+
return _invoke_function_over_columns("var_samp", col)
|
|
1331
|
+
|
|
1332
|
+
|
|
1333
|
+
var_samp.__doc__ = pysparkfuncs.var_samp.__doc__
|
|
1334
|
+
|
|
1335
|
+
|
|
1336
|
+
def variance(col: "ColumnOrName") -> Column:
|
|
1337
|
+
return var_samp(col)
|
|
1338
|
+
|
|
1339
|
+
|
|
1340
|
+
variance.__doc__ = pysparkfuncs.variance.__doc__
|
|
1341
|
+
|
|
1342
|
+
|
|
1343
|
+
def every(col: "ColumnOrName") -> Column:
|
|
1344
|
+
return _invoke_function_over_columns("every", col)
|
|
1345
|
+
|
|
1346
|
+
|
|
1347
|
+
every.__doc__ = pysparkfuncs.every.__doc__
|
|
1348
|
+
|
|
1349
|
+
|
|
1350
|
+
def bool_and(col: "ColumnOrName") -> Column:
|
|
1351
|
+
return _invoke_function_over_columns("bool_and", col)
|
|
1352
|
+
|
|
1353
|
+
|
|
1354
|
+
bool_and.__doc__ = pysparkfuncs.bool_and.__doc__
|
|
1355
|
+
|
|
1356
|
+
|
|
1357
|
+
def some(col: "ColumnOrName") -> Column:
|
|
1358
|
+
return _invoke_function_over_columns("some", col)
|
|
1359
|
+
|
|
1360
|
+
|
|
1361
|
+
some.__doc__ = pysparkfuncs.some.__doc__
|
|
1362
|
+
|
|
1363
|
+
|
|
1364
|
+
def bool_or(col: "ColumnOrName") -> Column:
|
|
1365
|
+
return _invoke_function_over_columns("bool_or", col)
|
|
1366
|
+
|
|
1367
|
+
|
|
1368
|
+
bool_or.__doc__ = pysparkfuncs.bool_or.__doc__
|
|
1369
|
+
|
|
1370
|
+
|
|
1371
|
+
def bit_and(col: "ColumnOrName") -> Column:
|
|
1372
|
+
return _invoke_function_over_columns("bit_and", col)
|
|
1373
|
+
|
|
1374
|
+
|
|
1375
|
+
bit_and.__doc__ = pysparkfuncs.bit_and.__doc__
|
|
1376
|
+
|
|
1377
|
+
|
|
1378
|
+
def bit_or(col: "ColumnOrName") -> Column:
|
|
1379
|
+
return _invoke_function_over_columns("bit_or", col)
|
|
1380
|
+
|
|
1381
|
+
|
|
1382
|
+
bit_or.__doc__ = pysparkfuncs.bit_or.__doc__
|
|
1383
|
+
|
|
1384
|
+
|
|
1385
|
+
def bit_xor(col: "ColumnOrName") -> Column:
|
|
1386
|
+
return _invoke_function_over_columns("bit_xor", col)
|
|
1387
|
+
|
|
1388
|
+
|
|
1389
|
+
bit_xor.__doc__ = pysparkfuncs.bit_xor.__doc__
|
|
1390
|
+
|
|
1391
|
+
|
|
1392
|
+
# Window Functions
|
|
1393
|
+
|
|
1394
|
+
|
|
1395
|
+
def cume_dist() -> Column:
|
|
1396
|
+
return _invoke_function("cume_dist")
|
|
1397
|
+
|
|
1398
|
+
|
|
1399
|
+
cume_dist.__doc__ = pysparkfuncs.cume_dist.__doc__
|
|
1400
|
+
|
|
1401
|
+
|
|
1402
|
+
def dense_rank() -> Column:
|
|
1403
|
+
return _invoke_function("dense_rank")
|
|
1404
|
+
|
|
1405
|
+
|
|
1406
|
+
dense_rank.__doc__ = pysparkfuncs.dense_rank.__doc__
|
|
1407
|
+
|
|
1408
|
+
|
|
1409
|
+
def lag(col: "ColumnOrName", offset: int = 1, default: Optional[Any] = None) -> Column:
|
|
1410
|
+
if default is None:
|
|
1411
|
+
return _invoke_function("lag", _to_col(col), lit(offset))
|
|
1412
|
+
else:
|
|
1413
|
+
return _invoke_function("lag", _to_col(col), lit(offset), lit(default))
|
|
1414
|
+
|
|
1415
|
+
|
|
1416
|
+
lag.__doc__ = pysparkfuncs.lag.__doc__
|
|
1417
|
+
|
|
1418
|
+
|
|
1419
|
+
def lead(col: "ColumnOrName", offset: int = 1, default: Optional[Any] = None) -> Column:
|
|
1420
|
+
if default is None:
|
|
1421
|
+
return _invoke_function("lead", _to_col(col), lit(offset))
|
|
1422
|
+
else:
|
|
1423
|
+
return _invoke_function("lead", _to_col(col), lit(offset), lit(default))
|
|
1424
|
+
|
|
1425
|
+
|
|
1426
|
+
lead.__doc__ = pysparkfuncs.lead.__doc__
|
|
1427
|
+
|
|
1428
|
+
|
|
1429
|
+
def nth_value(col: "ColumnOrName", offset: int, ignoreNulls: Optional[bool] = None) -> Column:
|
|
1430
|
+
if ignoreNulls is None:
|
|
1431
|
+
return _invoke_function("nth_value", _to_col(col), lit(offset))
|
|
1432
|
+
else:
|
|
1433
|
+
return _invoke_function("nth_value", _to_col(col), lit(offset), lit(ignoreNulls))
|
|
1434
|
+
|
|
1435
|
+
|
|
1436
|
+
nth_value.__doc__ = pysparkfuncs.nth_value.__doc__
|
|
1437
|
+
|
|
1438
|
+
|
|
1439
|
+
def any_value(col: "ColumnOrName", ignoreNulls: Optional[Union[bool, Column]] = None) -> Column:
|
|
1440
|
+
if ignoreNulls is None:
|
|
1441
|
+
return _invoke_function_over_columns("any_value", col)
|
|
1442
|
+
else:
|
|
1443
|
+
ignoreNulls = lit(ignoreNulls) if isinstance(ignoreNulls, bool) else ignoreNulls
|
|
1444
|
+
return _invoke_function_over_columns("any_value", col, ignoreNulls)
|
|
1445
|
+
|
|
1446
|
+
|
|
1447
|
+
any_value.__doc__ = pysparkfuncs.any_value.__doc__
|
|
1448
|
+
|
|
1449
|
+
|
|
1450
|
+
def first_value(col: "ColumnOrName", ignoreNulls: Optional[Union[bool, Column]] = None) -> Column:
|
|
1451
|
+
if ignoreNulls is None:
|
|
1452
|
+
return _invoke_function_over_columns("first_value", col)
|
|
1453
|
+
else:
|
|
1454
|
+
ignoreNulls = lit(ignoreNulls) if isinstance(ignoreNulls, bool) else ignoreNulls
|
|
1455
|
+
return _invoke_function_over_columns("first_value", col, ignoreNulls)
|
|
1456
|
+
|
|
1457
|
+
|
|
1458
|
+
first_value.__doc__ = pysparkfuncs.first_value.__doc__
|
|
1459
|
+
|
|
1460
|
+
|
|
1461
|
+
def last_value(col: "ColumnOrName", ignoreNulls: Optional[Union[bool, Column]] = None) -> Column:
|
|
1462
|
+
if ignoreNulls is None:
|
|
1463
|
+
return _invoke_function_over_columns("last_value", col)
|
|
1464
|
+
else:
|
|
1465
|
+
ignoreNulls = lit(ignoreNulls) if isinstance(ignoreNulls, bool) else ignoreNulls
|
|
1466
|
+
return _invoke_function_over_columns("last_value", col, ignoreNulls)
|
|
1467
|
+
|
|
1468
|
+
|
|
1469
|
+
last_value.__doc__ = pysparkfuncs.last_value.__doc__
|
|
1470
|
+
|
|
1471
|
+
|
|
1472
|
+
def count_if(col: "ColumnOrName") -> Column:
|
|
1473
|
+
return _invoke_function_over_columns("count_if", col)
|
|
1474
|
+
|
|
1475
|
+
|
|
1476
|
+
count_if.__doc__ = pysparkfuncs.count_if.__doc__
|
|
1477
|
+
|
|
1478
|
+
|
|
1479
|
+
def histogram_numeric(col: "ColumnOrName", nBins: "ColumnOrName") -> Column:
|
|
1480
|
+
return _invoke_function_over_columns("histogram_numeric", col, nBins)
|
|
1481
|
+
|
|
1482
|
+
|
|
1483
|
+
histogram_numeric.__doc__ = pysparkfuncs.histogram_numeric.__doc__
|
|
1484
|
+
|
|
1485
|
+
|
|
1486
|
+
def ntile(n: int) -> Column:
|
|
1487
|
+
return _invoke_function("ntile", lit(n))
|
|
1488
|
+
|
|
1489
|
+
|
|
1490
|
+
ntile.__doc__ = pysparkfuncs.ntile.__doc__
|
|
1491
|
+
|
|
1492
|
+
|
|
1493
|
+
def percent_rank() -> Column:
|
|
1494
|
+
return _invoke_function("percent_rank")
|
|
1495
|
+
|
|
1496
|
+
|
|
1497
|
+
percent_rank.__doc__ = pysparkfuncs.percent_rank.__doc__
|
|
1498
|
+
|
|
1499
|
+
|
|
1500
|
+
def rank() -> Column:
|
|
1501
|
+
return _invoke_function("rank")
|
|
1502
|
+
|
|
1503
|
+
|
|
1504
|
+
rank.__doc__ = pysparkfuncs.rank.__doc__
|
|
1505
|
+
|
|
1506
|
+
|
|
1507
|
+
def row_number() -> Column:
|
|
1508
|
+
return _invoke_function("row_number")
|
|
1509
|
+
|
|
1510
|
+
|
|
1511
|
+
row_number.__doc__ = pysparkfuncs.row_number.__doc__
|
|
1512
|
+
|
|
1513
|
+
|
|
1514
|
+
def aggregate(
|
|
1515
|
+
col: "ColumnOrName",
|
|
1516
|
+
initialValue: "ColumnOrName",
|
|
1517
|
+
merge: Callable[[Column, Column], Column],
|
|
1518
|
+
finish: Optional[Callable[[Column], Column]] = None,
|
|
1519
|
+
) -> Column:
|
|
1520
|
+
if finish is not None:
|
|
1521
|
+
return _invoke_higher_order_function("aggregate", [col, initialValue], [merge, finish])
|
|
1522
|
+
|
|
1523
|
+
else:
|
|
1524
|
+
return _invoke_higher_order_function("aggregate", [col, initialValue], [merge])
|
|
1525
|
+
|
|
1526
|
+
|
|
1527
|
+
aggregate.__doc__ = pysparkfuncs.aggregate.__doc__
|
|
1528
|
+
|
|
1529
|
+
|
|
1530
|
+
def reduce(
|
|
1531
|
+
col: "ColumnOrName",
|
|
1532
|
+
initialValue: "ColumnOrName",
|
|
1533
|
+
merge: Callable[[Column, Column], Column],
|
|
1534
|
+
finish: Optional[Callable[[Column], Column]] = None,
|
|
1535
|
+
) -> Column:
|
|
1536
|
+
if finish is not None:
|
|
1537
|
+
return _invoke_higher_order_function("reduce", [col, initialValue], [merge, finish])
|
|
1538
|
+
|
|
1539
|
+
else:
|
|
1540
|
+
return _invoke_higher_order_function("reduce", [col, initialValue], [merge])
|
|
1541
|
+
|
|
1542
|
+
|
|
1543
|
+
reduce.__doc__ = pysparkfuncs.reduce.__doc__
|
|
1544
|
+
|
|
1545
|
+
|
|
1546
|
+
def array(*cols: Union["ColumnOrName", List["ColumnOrName"], Tuple["ColumnOrName", ...]]) -> Column:
|
|
1547
|
+
if len(cols) == 1 and isinstance(cols[0], (list, set, tuple)):
|
|
1548
|
+
cols = cols[0] # type: ignore[assignment]
|
|
1549
|
+
return _invoke_function_over_columns("array", *cols) # type: ignore[arg-type]
|
|
1550
|
+
|
|
1551
|
+
|
|
1552
|
+
array.__doc__ = pysparkfuncs.array.__doc__
|
|
1553
|
+
|
|
1554
|
+
|
|
1555
|
+
def array_append(col: "ColumnOrName", value: Any) -> Column:
|
|
1556
|
+
return _invoke_function("array_append", _to_col(col), lit(value))
|
|
1557
|
+
|
|
1558
|
+
|
|
1559
|
+
array_append.__doc__ = pysparkfuncs.array_append.__doc__
|
|
1560
|
+
|
|
1561
|
+
|
|
1562
|
+
def array_contains(col: "ColumnOrName", value: Any) -> Column:
|
|
1563
|
+
return _invoke_function("array_contains", _to_col(col), lit(value))
|
|
1564
|
+
|
|
1565
|
+
|
|
1566
|
+
array_contains.__doc__ = pysparkfuncs.array_contains.__doc__
|
|
1567
|
+
|
|
1568
|
+
|
|
1569
|
+
def array_distinct(col: "ColumnOrName") -> Column:
|
|
1570
|
+
return _invoke_function_over_columns("array_distinct", col)
|
|
1571
|
+
|
|
1572
|
+
|
|
1573
|
+
array_distinct.__doc__ = pysparkfuncs.array_distinct.__doc__
|
|
1574
|
+
|
|
1575
|
+
|
|
1576
|
+
def array_except(col1: "ColumnOrName", col2: "ColumnOrName") -> Column:
|
|
1577
|
+
return _invoke_function_over_columns("array_except", col1, col2)
|
|
1578
|
+
|
|
1579
|
+
|
|
1580
|
+
array_except.__doc__ = pysparkfuncs.array_except.__doc__
|
|
1581
|
+
|
|
1582
|
+
|
|
1583
|
+
def array_insert(arr: "ColumnOrName", pos: Union["ColumnOrName", int], value: Any) -> Column:
|
|
1584
|
+
_pos = lit(pos) if isinstance(pos, int) else _to_col(pos)
|
|
1585
|
+
return _invoke_function("array_insert", _to_col(arr), _pos, lit(value))
|
|
1586
|
+
|
|
1587
|
+
|
|
1588
|
+
array_insert.__doc__ = pysparkfuncs.array_insert.__doc__
|
|
1589
|
+
|
|
1590
|
+
|
|
1591
|
+
def array_intersect(col1: "ColumnOrName", col2: "ColumnOrName") -> Column:
|
|
1592
|
+
return _invoke_function_over_columns("array_intersect", col1, col2)
|
|
1593
|
+
|
|
1594
|
+
|
|
1595
|
+
array_intersect.__doc__ = pysparkfuncs.array_intersect.__doc__
|
|
1596
|
+
|
|
1597
|
+
|
|
1598
|
+
def array_compact(col: "ColumnOrName") -> Column:
|
|
1599
|
+
return _invoke_function_over_columns("array_compact", col)
|
|
1600
|
+
|
|
1601
|
+
|
|
1602
|
+
array_compact.__doc__ = pysparkfuncs.array_compact.__doc__
|
|
1603
|
+
|
|
1604
|
+
|
|
1605
|
+
def array_join(
|
|
1606
|
+
col: "ColumnOrName", delimiter: str, null_replacement: Optional[str] = None
|
|
1607
|
+
) -> Column:
|
|
1608
|
+
if null_replacement is None:
|
|
1609
|
+
return _invoke_function("array_join", _to_col(col), lit(delimiter))
|
|
1610
|
+
else:
|
|
1611
|
+
return _invoke_function("array_join", _to_col(col), lit(delimiter), lit(null_replacement))
|
|
1612
|
+
|
|
1613
|
+
|
|
1614
|
+
array_join.__doc__ = pysparkfuncs.array_join.__doc__
|
|
1615
|
+
|
|
1616
|
+
|
|
1617
|
+
def array_max(col: "ColumnOrName") -> Column:
|
|
1618
|
+
return _invoke_function_over_columns("array_max", col)
|
|
1619
|
+
|
|
1620
|
+
|
|
1621
|
+
array_max.__doc__ = pysparkfuncs.array_max.__doc__
|
|
1622
|
+
|
|
1623
|
+
|
|
1624
|
+
def array_min(col: "ColumnOrName") -> Column:
|
|
1625
|
+
return _invoke_function_over_columns("array_min", col)
|
|
1626
|
+
|
|
1627
|
+
|
|
1628
|
+
array_min.__doc__ = pysparkfuncs.array_min.__doc__
|
|
1629
|
+
|
|
1630
|
+
|
|
1631
|
+
def array_size(col: "ColumnOrName") -> Column:
|
|
1632
|
+
return _invoke_function_over_columns("array_size", col)
|
|
1633
|
+
|
|
1634
|
+
|
|
1635
|
+
array_size.__doc__ = pysparkfuncs.array_size.__doc__
|
|
1636
|
+
|
|
1637
|
+
|
|
1638
|
+
def cardinality(col: "ColumnOrName") -> Column:
|
|
1639
|
+
return _invoke_function_over_columns("cardinality", col)
|
|
1640
|
+
|
|
1641
|
+
|
|
1642
|
+
cardinality.__doc__ = pysparkfuncs.cardinality.__doc__
|
|
1643
|
+
|
|
1644
|
+
|
|
1645
|
+
def array_position(col: "ColumnOrName", value: Any) -> Column:
|
|
1646
|
+
return _invoke_function("array_position", _to_col(col), lit(value))
|
|
1647
|
+
|
|
1648
|
+
|
|
1649
|
+
array_position.__doc__ = pysparkfuncs.array_position.__doc__
|
|
1650
|
+
|
|
1651
|
+
|
|
1652
|
+
def array_prepend(col: "ColumnOrName", value: Any) -> Column:
|
|
1653
|
+
return _invoke_function("array_prepend", _to_col(col), lit(value))
|
|
1654
|
+
|
|
1655
|
+
|
|
1656
|
+
array_prepend.__doc__ = pysparkfuncs.array_prepend.__doc__
|
|
1657
|
+
|
|
1658
|
+
|
|
1659
|
+
def array_remove(col: "ColumnOrName", element: Any) -> Column:
|
|
1660
|
+
return _invoke_function("array_remove", _to_col(col), lit(element))
|
|
1661
|
+
|
|
1662
|
+
|
|
1663
|
+
array_remove.__doc__ = pysparkfuncs.array_remove.__doc__
|
|
1664
|
+
|
|
1665
|
+
|
|
1666
|
+
def array_repeat(col: "ColumnOrName", count: Union["ColumnOrName", int]) -> Column:
|
|
1667
|
+
_count = lit(count) if isinstance(count, int) else _to_col(count)
|
|
1668
|
+
return _invoke_function("array_repeat", _to_col(col), _count)
|
|
1669
|
+
|
|
1670
|
+
|
|
1671
|
+
array_repeat.__doc__ = pysparkfuncs.array_repeat.__doc__
|
|
1672
|
+
|
|
1673
|
+
|
|
1674
|
+
def array_sort(
|
|
1675
|
+
col: "ColumnOrName", comparator: Optional[Callable[[Column, Column], Column]] = None
|
|
1676
|
+
) -> Column:
|
|
1677
|
+
if comparator is None:
|
|
1678
|
+
return _invoke_function_over_columns("array_sort", col)
|
|
1679
|
+
else:
|
|
1680
|
+
return _invoke_higher_order_function("array_sort", [col], [comparator])
|
|
1681
|
+
|
|
1682
|
+
|
|
1683
|
+
array_sort.__doc__ = pysparkfuncs.array_sort.__doc__
|
|
1684
|
+
|
|
1685
|
+
|
|
1686
|
+
def array_union(col1: "ColumnOrName", col2: "ColumnOrName") -> Column:
|
|
1687
|
+
return _invoke_function_over_columns("array_union", col1, col2)
|
|
1688
|
+
|
|
1689
|
+
|
|
1690
|
+
array_union.__doc__ = pysparkfuncs.array_union.__doc__
|
|
1691
|
+
|
|
1692
|
+
|
|
1693
|
+
def arrays_overlap(a1: "ColumnOrName", a2: "ColumnOrName") -> Column:
|
|
1694
|
+
return _invoke_function_over_columns("arrays_overlap", a1, a2)
|
|
1695
|
+
|
|
1696
|
+
|
|
1697
|
+
arrays_overlap.__doc__ = pysparkfuncs.arrays_overlap.__doc__
|
|
1698
|
+
|
|
1699
|
+
|
|
1700
|
+
def arrays_zip(*cols: "ColumnOrName") -> Column:
|
|
1701
|
+
return _invoke_function_over_columns("arrays_zip", *cols)
|
|
1702
|
+
|
|
1703
|
+
|
|
1704
|
+
arrays_zip.__doc__ = pysparkfuncs.arrays_zip.__doc__
|
|
1705
|
+
|
|
1706
|
+
|
|
1707
|
+
def concat(*cols: "ColumnOrName") -> Column:
|
|
1708
|
+
return _invoke_function_over_columns("concat", *cols)
|
|
1709
|
+
|
|
1710
|
+
|
|
1711
|
+
concat.__doc__ = pysparkfuncs.concat.__doc__
|
|
1712
|
+
|
|
1713
|
+
|
|
1714
|
+
def create_map(
|
|
1715
|
+
*cols: Union["ColumnOrName", List["ColumnOrName"], Tuple["ColumnOrName", ...]]
|
|
1716
|
+
) -> Column:
|
|
1717
|
+
if len(cols) == 1 and isinstance(cols[0], (list, set, tuple)):
|
|
1718
|
+
cols = cols[0] # type: ignore[assignment]
|
|
1719
|
+
return _invoke_function_over_columns("map", *cols) # type: ignore[arg-type]
|
|
1720
|
+
|
|
1721
|
+
|
|
1722
|
+
create_map.__doc__ = pysparkfuncs.create_map.__doc__
|
|
1723
|
+
|
|
1724
|
+
|
|
1725
|
+
def element_at(col: "ColumnOrName", extraction: Any) -> Column:
|
|
1726
|
+
return _invoke_function("element_at", _to_col(col), lit(extraction))
|
|
1727
|
+
|
|
1728
|
+
|
|
1729
|
+
element_at.__doc__ = pysparkfuncs.element_at.__doc__
|
|
1730
|
+
|
|
1731
|
+
|
|
1732
|
+
def try_element_at(col: "ColumnOrName", extraction: "ColumnOrName") -> Column:
|
|
1733
|
+
return _invoke_function_over_columns("try_element_at", col, extraction)
|
|
1734
|
+
|
|
1735
|
+
|
|
1736
|
+
try_element_at.__doc__ = pysparkfuncs.try_element_at.__doc__
|
|
1737
|
+
|
|
1738
|
+
|
|
1739
|
+
def exists(col: "ColumnOrName", f: Callable[[Column], Column]) -> Column:
|
|
1740
|
+
return _invoke_higher_order_function("exists", [col], [f])
|
|
1741
|
+
|
|
1742
|
+
|
|
1743
|
+
exists.__doc__ = pysparkfuncs.exists.__doc__
|
|
1744
|
+
|
|
1745
|
+
|
|
1746
|
+
def explode(col: "ColumnOrName") -> Column:
|
|
1747
|
+
return _invoke_function_over_columns("explode", col)
|
|
1748
|
+
|
|
1749
|
+
|
|
1750
|
+
explode.__doc__ = pysparkfuncs.explode.__doc__
|
|
1751
|
+
|
|
1752
|
+
|
|
1753
|
+
def explode_outer(col: "ColumnOrName") -> Column:
|
|
1754
|
+
return _invoke_function_over_columns("explode_outer", col)
|
|
1755
|
+
|
|
1756
|
+
|
|
1757
|
+
explode_outer.__doc__ = pysparkfuncs.explode_outer.__doc__
|
|
1758
|
+
|
|
1759
|
+
|
|
1760
|
+
def filter(
|
|
1761
|
+
col: "ColumnOrName",
|
|
1762
|
+
f: Union[Callable[[Column], Column], Callable[[Column, Column], Column]],
|
|
1763
|
+
) -> Column:
|
|
1764
|
+
return _invoke_higher_order_function("filter", [col], [f])
|
|
1765
|
+
|
|
1766
|
+
|
|
1767
|
+
filter.__doc__ = pysparkfuncs.filter.__doc__
|
|
1768
|
+
|
|
1769
|
+
|
|
1770
|
+
def flatten(col: "ColumnOrName") -> Column:
|
|
1771
|
+
return _invoke_function_over_columns("flatten", col)
|
|
1772
|
+
|
|
1773
|
+
|
|
1774
|
+
flatten.__doc__ = pysparkfuncs.flatten.__doc__
|
|
1775
|
+
|
|
1776
|
+
|
|
1777
|
+
def forall(col: "ColumnOrName", f: Callable[[Column], Column]) -> Column:
|
|
1778
|
+
return _invoke_higher_order_function("forall", [col], [f])
|
|
1779
|
+
|
|
1780
|
+
|
|
1781
|
+
forall.__doc__ = pysparkfuncs.forall.__doc__
|
|
1782
|
+
|
|
1783
|
+
|
|
1784
|
+
# TODO: support options
|
|
1785
|
+
def from_csv(
|
|
1786
|
+
col: "ColumnOrName",
|
|
1787
|
+
schema: Union[Column, str],
|
|
1788
|
+
options: Optional[Dict[str, str]] = None,
|
|
1789
|
+
) -> Column:
|
|
1790
|
+
if isinstance(schema, Column):
|
|
1791
|
+
_schema = schema
|
|
1792
|
+
elif isinstance(schema, str):
|
|
1793
|
+
_schema = lit(schema)
|
|
1794
|
+
else:
|
|
1795
|
+
raise PySparkTypeError(
|
|
1796
|
+
error_class="NOT_COLUMN_OR_STR",
|
|
1797
|
+
message_parameters={"arg_name": "schema", "arg_type": type(schema).__name__},
|
|
1798
|
+
)
|
|
1799
|
+
|
|
1800
|
+
if options is None:
|
|
1801
|
+
return _invoke_function("from_csv", _to_col(col), _schema)
|
|
1802
|
+
else:
|
|
1803
|
+
return _invoke_function("from_csv", _to_col(col), _schema, _options_to_col(options))
|
|
1804
|
+
|
|
1805
|
+
|
|
1806
|
+
from_csv.__doc__ = pysparkfuncs.from_csv.__doc__
|
|
1807
|
+
|
|
1808
|
+
|
|
1809
|
+
def from_json(
|
|
1810
|
+
col: "ColumnOrName",
|
|
1811
|
+
schema: Union[ArrayType, StructType, Column, str],
|
|
1812
|
+
options: Optional[Dict[str, str]] = None,
|
|
1813
|
+
) -> Column:
|
|
1814
|
+
if isinstance(schema, Column):
|
|
1815
|
+
_schema = schema
|
|
1816
|
+
elif isinstance(schema, DataType):
|
|
1817
|
+
_schema = lit(schema.json())
|
|
1818
|
+
elif isinstance(schema, str):
|
|
1819
|
+
_schema = lit(schema)
|
|
1820
|
+
else:
|
|
1821
|
+
raise PySparkTypeError(
|
|
1822
|
+
error_class="NOT_COLUMN_OR_DATATYPE_OR_STR",
|
|
1823
|
+
message_parameters={"arg_name": "schema", "arg_type": type(schema).__name__},
|
|
1824
|
+
)
|
|
1825
|
+
|
|
1826
|
+
if options is None:
|
|
1827
|
+
return _invoke_function("from_json", _to_col(col), _schema)
|
|
1828
|
+
else:
|
|
1829
|
+
return _invoke_function("from_json", _to_col(col), _schema, _options_to_col(options))
|
|
1830
|
+
|
|
1831
|
+
|
|
1832
|
+
from_json.__doc__ = pysparkfuncs.from_json.__doc__
|
|
1833
|
+
|
|
1834
|
+
|
|
1835
|
+
def get(col: "ColumnOrName", index: Union["ColumnOrName", int]) -> Column:
|
|
1836
|
+
index = lit(index) if isinstance(index, int) else index
|
|
1837
|
+
|
|
1838
|
+
return _invoke_function_over_columns("get", col, index)
|
|
1839
|
+
|
|
1840
|
+
|
|
1841
|
+
get.__doc__ = pysparkfuncs.get.__doc__
|
|
1842
|
+
|
|
1843
|
+
|
|
1844
|
+
def get_json_object(col: "ColumnOrName", path: str) -> Column:
|
|
1845
|
+
return _invoke_function("get_json_object", _to_col(col), lit(path))
|
|
1846
|
+
|
|
1847
|
+
|
|
1848
|
+
get_json_object.__doc__ = pysparkfuncs.get_json_object.__doc__
|
|
1849
|
+
|
|
1850
|
+
|
|
1851
|
+
def json_array_length(col: "ColumnOrName") -> Column:
|
|
1852
|
+
return _invoke_function_over_columns("json_array_length", col)
|
|
1853
|
+
|
|
1854
|
+
|
|
1855
|
+
json_array_length.__doc__ = pysparkfuncs.json_array_length.__doc__
|
|
1856
|
+
|
|
1857
|
+
|
|
1858
|
+
def json_object_keys(col: "ColumnOrName") -> Column:
|
|
1859
|
+
return _invoke_function_over_columns("json_object_keys", col)
|
|
1860
|
+
|
|
1861
|
+
|
|
1862
|
+
json_object_keys.__doc__ = pysparkfuncs.json_object_keys.__doc__
|
|
1863
|
+
|
|
1864
|
+
|
|
1865
|
+
def inline(col: "ColumnOrName") -> Column:
|
|
1866
|
+
return _invoke_function_over_columns("inline", col)
|
|
1867
|
+
|
|
1868
|
+
|
|
1869
|
+
inline.__doc__ = pysparkfuncs.inline.__doc__
|
|
1870
|
+
|
|
1871
|
+
|
|
1872
|
+
def inline_outer(col: "ColumnOrName") -> Column:
|
|
1873
|
+
return _invoke_function_over_columns("inline_outer", col)
|
|
1874
|
+
|
|
1875
|
+
|
|
1876
|
+
inline_outer.__doc__ = pysparkfuncs.inline_outer.__doc__
|
|
1877
|
+
|
|
1878
|
+
|
|
1879
|
+
def json_tuple(col: "ColumnOrName", *fields: str) -> Column:
|
|
1880
|
+
return _invoke_function("json_tuple", _to_col(col), *[lit(field) for field in fields])
|
|
1881
|
+
|
|
1882
|
+
|
|
1883
|
+
json_tuple.__doc__ = pysparkfuncs.json_tuple.__doc__
|
|
1884
|
+
|
|
1885
|
+
|
|
1886
|
+
def map_concat(
|
|
1887
|
+
*cols: Union["ColumnOrName", List["ColumnOrName"], Tuple["ColumnOrName", ...]]
|
|
1888
|
+
) -> Column:
|
|
1889
|
+
if len(cols) == 1 and isinstance(cols[0], (list, set, tuple)):
|
|
1890
|
+
cols = cols[0] # type: ignore[assignment]
|
|
1891
|
+
return _invoke_function_over_columns("map_concat", *cols) # type: ignore[arg-type]
|
|
1892
|
+
|
|
1893
|
+
|
|
1894
|
+
map_concat.__doc__ = pysparkfuncs.map_concat.__doc__
|
|
1895
|
+
|
|
1896
|
+
|
|
1897
|
+
def map_contains_key(col: "ColumnOrName", value: Any) -> Column:
|
|
1898
|
+
return array_contains(map_keys(col), lit(value))
|
|
1899
|
+
|
|
1900
|
+
|
|
1901
|
+
map_contains_key.__doc__ = pysparkfuncs.map_contains_key.__doc__
|
|
1902
|
+
|
|
1903
|
+
|
|
1904
|
+
def map_entries(col: "ColumnOrName") -> Column:
|
|
1905
|
+
return _invoke_function_over_columns("map_entries", col)
|
|
1906
|
+
|
|
1907
|
+
|
|
1908
|
+
map_entries.__doc__ = pysparkfuncs.map_entries.__doc__
|
|
1909
|
+
|
|
1910
|
+
|
|
1911
|
+
def map_filter(col: "ColumnOrName", f: Callable[[Column, Column], Column]) -> Column:
|
|
1912
|
+
return _invoke_higher_order_function("map_filter", [col], [f])
|
|
1913
|
+
|
|
1914
|
+
|
|
1915
|
+
map_filter.__doc__ = pysparkfuncs.map_filter.__doc__
|
|
1916
|
+
|
|
1917
|
+
|
|
1918
|
+
def map_from_arrays(col1: "ColumnOrName", col2: "ColumnOrName") -> Column:
|
|
1919
|
+
return _invoke_function_over_columns("map_from_arrays", col1, col2)
|
|
1920
|
+
|
|
1921
|
+
|
|
1922
|
+
map_from_arrays.__doc__ = pysparkfuncs.map_from_arrays.__doc__
|
|
1923
|
+
|
|
1924
|
+
|
|
1925
|
+
def map_from_entries(col: "ColumnOrName") -> Column:
|
|
1926
|
+
return _invoke_function_over_columns("map_from_entries", col)
|
|
1927
|
+
|
|
1928
|
+
|
|
1929
|
+
map_from_entries.__doc__ = pysparkfuncs.map_from_entries.__doc__
|
|
1930
|
+
|
|
1931
|
+
|
|
1932
|
+
def map_keys(col: "ColumnOrName") -> Column:
|
|
1933
|
+
return _invoke_function_over_columns("map_keys", col)
|
|
1934
|
+
|
|
1935
|
+
|
|
1936
|
+
map_keys.__doc__ = pysparkfuncs.map_keys.__doc__
|
|
1937
|
+
|
|
1938
|
+
|
|
1939
|
+
def map_values(col: "ColumnOrName") -> Column:
|
|
1940
|
+
return _invoke_function_over_columns("map_values", col)
|
|
1941
|
+
|
|
1942
|
+
|
|
1943
|
+
map_values.__doc__ = pysparkfuncs.map_values.__doc__
|
|
1944
|
+
|
|
1945
|
+
|
|
1946
|
+
def map_zip_with(
|
|
1947
|
+
col1: "ColumnOrName",
|
|
1948
|
+
col2: "ColumnOrName",
|
|
1949
|
+
f: Callable[[Column, Column, Column], Column],
|
|
1950
|
+
) -> Column:
|
|
1951
|
+
return _invoke_higher_order_function("map_zip_with", [col1, col2], [f])
|
|
1952
|
+
|
|
1953
|
+
|
|
1954
|
+
map_zip_with.__doc__ = pysparkfuncs.map_zip_with.__doc__
|
|
1955
|
+
|
|
1956
|
+
|
|
1957
|
+
def str_to_map(
|
|
1958
|
+
text: "ColumnOrName",
|
|
1959
|
+
pairDelim: Optional["ColumnOrName"] = None,
|
|
1960
|
+
keyValueDelim: Optional["ColumnOrName"] = None,
|
|
1961
|
+
) -> Column:
|
|
1962
|
+
_pairDelim = lit(",") if pairDelim is None else _to_col(pairDelim)
|
|
1963
|
+
_keyValueDelim = lit(":") if keyValueDelim is None else _to_col(keyValueDelim)
|
|
1964
|
+
|
|
1965
|
+
return _invoke_function("str_to_map", _to_col(text), _pairDelim, _keyValueDelim)
|
|
1966
|
+
|
|
1967
|
+
|
|
1968
|
+
str_to_map.__doc__ = pysparkfuncs.str_to_map.__doc__
|
|
1969
|
+
|
|
1970
|
+
|
|
1971
|
+
def posexplode(col: "ColumnOrName") -> Column:
|
|
1972
|
+
return _invoke_function_over_columns("posexplode", col)
|
|
1973
|
+
|
|
1974
|
+
|
|
1975
|
+
posexplode.__doc__ = pysparkfuncs.posexplode.__doc__
|
|
1976
|
+
|
|
1977
|
+
|
|
1978
|
+
def posexplode_outer(col: "ColumnOrName") -> Column:
|
|
1979
|
+
return _invoke_function_over_columns("posexplode_outer", col)
|
|
1980
|
+
|
|
1981
|
+
|
|
1982
|
+
posexplode_outer.__doc__ = pysparkfuncs.posexplode_outer.__doc__
|
|
1983
|
+
|
|
1984
|
+
|
|
1985
|
+
def reverse(col: "ColumnOrName") -> Column:
|
|
1986
|
+
return _invoke_function_over_columns("reverse", col)
|
|
1987
|
+
|
|
1988
|
+
|
|
1989
|
+
reverse.__doc__ = pysparkfuncs.reverse.__doc__
|
|
1990
|
+
|
|
1991
|
+
|
|
1992
|
+
def sequence(
|
|
1993
|
+
start: "ColumnOrName", stop: "ColumnOrName", step: Optional["ColumnOrName"] = None
|
|
1994
|
+
) -> Column:
|
|
1995
|
+
if step is None:
|
|
1996
|
+
return _invoke_function_over_columns("sequence", start, stop)
|
|
1997
|
+
else:
|
|
1998
|
+
return _invoke_function_over_columns("sequence", start, stop, step)
|
|
1999
|
+
|
|
2000
|
+
|
|
2001
|
+
sequence.__doc__ = pysparkfuncs.sequence.__doc__
|
|
2002
|
+
|
|
2003
|
+
|
|
2004
|
+
def schema_of_csv(csv: "ColumnOrName", options: Optional[Dict[str, str]] = None) -> Column:
|
|
2005
|
+
if isinstance(csv, Column):
|
|
2006
|
+
_csv = csv
|
|
2007
|
+
elif isinstance(csv, str):
|
|
2008
|
+
_csv = lit(csv)
|
|
2009
|
+
else:
|
|
2010
|
+
raise PySparkTypeError(
|
|
2011
|
+
error_class="NOT_COLUMN_OR_STR",
|
|
2012
|
+
message_parameters={"arg_name": "csv", "arg_type": type(csv).__name__},
|
|
2013
|
+
)
|
|
2014
|
+
|
|
2015
|
+
if options is None:
|
|
2016
|
+
return _invoke_function("schema_of_csv", _csv)
|
|
2017
|
+
else:
|
|
2018
|
+
return _invoke_function("schema_of_csv", _csv, _options_to_col(options))
|
|
2019
|
+
|
|
2020
|
+
|
|
2021
|
+
schema_of_csv.__doc__ = pysparkfuncs.schema_of_csv.__doc__
|
|
2022
|
+
|
|
2023
|
+
|
|
2024
|
+
def schema_of_json(json: "ColumnOrName", options: Optional[Dict[str, str]] = None) -> Column:
|
|
2025
|
+
if isinstance(json, Column):
|
|
2026
|
+
_json = json
|
|
2027
|
+
elif isinstance(json, str):
|
|
2028
|
+
_json = lit(json)
|
|
2029
|
+
else:
|
|
2030
|
+
raise PySparkTypeError(
|
|
2031
|
+
error_class="NOT_COLUMN_OR_STR",
|
|
2032
|
+
message_parameters={"arg_name": "json", "arg_type": type(json).__name__},
|
|
2033
|
+
)
|
|
2034
|
+
|
|
2035
|
+
if options is None:
|
|
2036
|
+
return _invoke_function("schema_of_json", _json)
|
|
2037
|
+
else:
|
|
2038
|
+
return _invoke_function("schema_of_json", _json, _options_to_col(options))
|
|
2039
|
+
|
|
2040
|
+
|
|
2041
|
+
schema_of_json.__doc__ = pysparkfuncs.schema_of_json.__doc__
|
|
2042
|
+
|
|
2043
|
+
|
|
2044
|
+
def shuffle(col: "ColumnOrName") -> Column:
|
|
2045
|
+
return _invoke_function_over_columns("shuffle", col)
|
|
2046
|
+
|
|
2047
|
+
|
|
2048
|
+
shuffle.__doc__ = pysparkfuncs.shuffle.__doc__
|
|
2049
|
+
|
|
2050
|
+
|
|
2051
|
+
def size(col: "ColumnOrName") -> Column:
|
|
2052
|
+
return _invoke_function_over_columns("size", col)
|
|
2053
|
+
|
|
2054
|
+
|
|
2055
|
+
size.__doc__ = pysparkfuncs.size.__doc__
|
|
2056
|
+
|
|
2057
|
+
|
|
2058
|
+
def slice(
|
|
2059
|
+
col: "ColumnOrName", start: Union["ColumnOrName", int], length: Union["ColumnOrName", int]
|
|
2060
|
+
) -> Column:
|
|
2061
|
+
if isinstance(start, (Column, str)):
|
|
2062
|
+
_start = start
|
|
2063
|
+
elif isinstance(start, int):
|
|
2064
|
+
_start = lit(start)
|
|
2065
|
+
else:
|
|
2066
|
+
raise PySparkTypeError(
|
|
2067
|
+
error_class="NOT_COLUMN_OR_INT_OR_STR",
|
|
2068
|
+
message_parameters={"arg_name": "start", "arg_type": type(start).__name__},
|
|
2069
|
+
)
|
|
2070
|
+
|
|
2071
|
+
if isinstance(length, (Column, str)):
|
|
2072
|
+
_length = length
|
|
2073
|
+
elif isinstance(length, int):
|
|
2074
|
+
_length = lit(length)
|
|
2075
|
+
else:
|
|
2076
|
+
raise PySparkTypeError(
|
|
2077
|
+
error_class="NOT_COLUMN_OR_INT_OR_STR",
|
|
2078
|
+
message_parameters={"arg_name": "length", "arg_type": type(length).__name__},
|
|
2079
|
+
)
|
|
2080
|
+
|
|
2081
|
+
return _invoke_function_over_columns("slice", col, _start, _length)
|
|
2082
|
+
|
|
2083
|
+
|
|
2084
|
+
slice.__doc__ = pysparkfuncs.slice.__doc__
|
|
2085
|
+
|
|
2086
|
+
|
|
2087
|
+
def sort_array(col: "ColumnOrName", asc: bool = True) -> Column:
|
|
2088
|
+
return _invoke_function("sort_array", _to_col(col), lit(asc))
|
|
2089
|
+
|
|
2090
|
+
|
|
2091
|
+
sort_array.__doc__ = pysparkfuncs.sort_array.__doc__
|
|
2092
|
+
|
|
2093
|
+
|
|
2094
|
+
def struct(
|
|
2095
|
+
*cols: Union["ColumnOrName", List["ColumnOrName"], Tuple["ColumnOrName", ...]]
|
|
2096
|
+
) -> Column:
|
|
2097
|
+
if len(cols) == 1 and isinstance(cols[0], (list, set, tuple)):
|
|
2098
|
+
cols = cols[0] # type: ignore[assignment]
|
|
2099
|
+
return _invoke_function_over_columns("struct", *cols) # type: ignore[arg-type]
|
|
2100
|
+
|
|
2101
|
+
|
|
2102
|
+
struct.__doc__ = pysparkfuncs.struct.__doc__
|
|
2103
|
+
|
|
2104
|
+
|
|
2105
|
+
def named_struct(*cols: "ColumnOrName") -> Column:
|
|
2106
|
+
return _invoke_function_over_columns("named_struct", *cols)
|
|
2107
|
+
|
|
2108
|
+
|
|
2109
|
+
named_struct.__doc__ = pysparkfuncs.named_struct.__doc__
|
|
2110
|
+
|
|
2111
|
+
|
|
2112
|
+
def to_csv(col: "ColumnOrName", options: Optional[Dict[str, str]] = None) -> Column:
|
|
2113
|
+
if options is None:
|
|
2114
|
+
return _invoke_function("to_csv", _to_col(col))
|
|
2115
|
+
else:
|
|
2116
|
+
return _invoke_function("to_csv", _to_col(col), _options_to_col(options))
|
|
2117
|
+
|
|
2118
|
+
|
|
2119
|
+
to_csv.__doc__ = pysparkfuncs.to_csv.__doc__
|
|
2120
|
+
|
|
2121
|
+
|
|
2122
|
+
def to_json(col: "ColumnOrName", options: Optional[Dict[str, str]] = None) -> Column:
|
|
2123
|
+
if options is None:
|
|
2124
|
+
return _invoke_function("to_json", _to_col(col))
|
|
2125
|
+
else:
|
|
2126
|
+
return _invoke_function("to_json", _to_col(col), _options_to_col(options))
|
|
2127
|
+
|
|
2128
|
+
|
|
2129
|
+
to_json.__doc__ = pysparkfuncs.to_json.__doc__
|
|
2130
|
+
|
|
2131
|
+
|
|
2132
|
+
def transform(
|
|
2133
|
+
col: "ColumnOrName",
|
|
2134
|
+
f: Union[Callable[[Column], Column], Callable[[Column, Column], Column]],
|
|
2135
|
+
) -> Column:
|
|
2136
|
+
return _invoke_higher_order_function("transform", [col], [f])
|
|
2137
|
+
|
|
2138
|
+
|
|
2139
|
+
transform.__doc__ = pysparkfuncs.transform.__doc__
|
|
2140
|
+
|
|
2141
|
+
|
|
2142
|
+
def transform_keys(col: "ColumnOrName", f: Callable[[Column, Column], Column]) -> Column:
|
|
2143
|
+
return _invoke_higher_order_function("transform_keys", [col], [f])
|
|
2144
|
+
|
|
2145
|
+
|
|
2146
|
+
transform_keys.__doc__ = pysparkfuncs.transform_keys.__doc__
|
|
2147
|
+
|
|
2148
|
+
|
|
2149
|
+
def transform_values(col: "ColumnOrName", f: Callable[[Column, Column], Column]) -> Column:
|
|
2150
|
+
return _invoke_higher_order_function("transform_values", [col], [f])
|
|
2151
|
+
|
|
2152
|
+
|
|
2153
|
+
transform_values.__doc__ = pysparkfuncs.transform_values.__doc__
|
|
2154
|
+
|
|
2155
|
+
|
|
2156
|
+
def zip_with(
|
|
2157
|
+
left: "ColumnOrName",
|
|
2158
|
+
right: "ColumnOrName",
|
|
2159
|
+
f: Callable[[Column, Column], Column],
|
|
2160
|
+
) -> Column:
|
|
2161
|
+
return _invoke_higher_order_function("zip_with", [left, right], [f])
|
|
2162
|
+
|
|
2163
|
+
|
|
2164
|
+
zip_with.__doc__ = pysparkfuncs.zip_with.__doc__
|
|
2165
|
+
|
|
2166
|
+
|
|
2167
|
+
# String/Binary functions
|
|
2168
|
+
|
|
2169
|
+
|
|
2170
|
+
def upper(col: "ColumnOrName") -> Column:
|
|
2171
|
+
return _invoke_function_over_columns("upper", col)
|
|
2172
|
+
|
|
2173
|
+
|
|
2174
|
+
upper.__doc__ = pysparkfuncs.upper.__doc__
|
|
2175
|
+
|
|
2176
|
+
|
|
2177
|
+
def lower(col: "ColumnOrName") -> Column:
|
|
2178
|
+
return _invoke_function_over_columns("lower", col)
|
|
2179
|
+
|
|
2180
|
+
|
|
2181
|
+
lower.__doc__ = pysparkfuncs.lower.__doc__
|
|
2182
|
+
|
|
2183
|
+
|
|
2184
|
+
def ascii(col: "ColumnOrName") -> Column:
|
|
2185
|
+
return _invoke_function_over_columns("ascii", col)
|
|
2186
|
+
|
|
2187
|
+
|
|
2188
|
+
ascii.__doc__ = pysparkfuncs.ascii.__doc__
|
|
2189
|
+
|
|
2190
|
+
|
|
2191
|
+
def base64(col: "ColumnOrName") -> Column:
|
|
2192
|
+
return _invoke_function_over_columns("base64", col)
|
|
2193
|
+
|
|
2194
|
+
|
|
2195
|
+
base64.__doc__ = pysparkfuncs.base64.__doc__
|
|
2196
|
+
|
|
2197
|
+
|
|
2198
|
+
def unbase64(col: "ColumnOrName") -> Column:
|
|
2199
|
+
return _invoke_function_over_columns("unbase64", col)
|
|
2200
|
+
|
|
2201
|
+
|
|
2202
|
+
unbase64.__doc__ = pysparkfuncs.unbase64.__doc__
|
|
2203
|
+
|
|
2204
|
+
|
|
2205
|
+
def ltrim(col: "ColumnOrName") -> Column:
|
|
2206
|
+
return _invoke_function_over_columns("ltrim", col)
|
|
2207
|
+
|
|
2208
|
+
|
|
2209
|
+
ltrim.__doc__ = pysparkfuncs.ltrim.__doc__
|
|
2210
|
+
|
|
2211
|
+
|
|
2212
|
+
def rtrim(col: "ColumnOrName") -> Column:
|
|
2213
|
+
return _invoke_function_over_columns("rtrim", col)
|
|
2214
|
+
|
|
2215
|
+
|
|
2216
|
+
rtrim.__doc__ = pysparkfuncs.rtrim.__doc__
|
|
2217
|
+
|
|
2218
|
+
|
|
2219
|
+
def trim(col: "ColumnOrName") -> Column:
|
|
2220
|
+
return _invoke_function_over_columns("trim", col)
|
|
2221
|
+
|
|
2222
|
+
|
|
2223
|
+
trim.__doc__ = pysparkfuncs.trim.__doc__
|
|
2224
|
+
|
|
2225
|
+
|
|
2226
|
+
def concat_ws(sep: str, *cols: "ColumnOrName") -> Column:
|
|
2227
|
+
return _invoke_function("concat_ws", lit(sep), *[_to_col(c) for c in cols])
|
|
2228
|
+
|
|
2229
|
+
|
|
2230
|
+
concat_ws.__doc__ = pysparkfuncs.concat_ws.__doc__
|
|
2231
|
+
|
|
2232
|
+
|
|
2233
|
+
def decode(col: "ColumnOrName", charset: str) -> Column:
|
|
2234
|
+
return _invoke_function("decode", _to_col(col), lit(charset))
|
|
2235
|
+
|
|
2236
|
+
|
|
2237
|
+
decode.__doc__ = pysparkfuncs.decode.__doc__
|
|
2238
|
+
|
|
2239
|
+
|
|
2240
|
+
def encode(col: "ColumnOrName", charset: str) -> Column:
|
|
2241
|
+
return _invoke_function("encode", _to_col(col), lit(charset))
|
|
2242
|
+
|
|
2243
|
+
|
|
2244
|
+
encode.__doc__ = pysparkfuncs.encode.__doc__
|
|
2245
|
+
|
|
2246
|
+
|
|
2247
|
+
def format_number(col: "ColumnOrName", d: int) -> Column:
|
|
2248
|
+
return _invoke_function("format_number", _to_col(col), lit(d))
|
|
2249
|
+
|
|
2250
|
+
|
|
2251
|
+
format_number.__doc__ = pysparkfuncs.format_number.__doc__
|
|
2252
|
+
|
|
2253
|
+
|
|
2254
|
+
def format_string(format: str, *cols: "ColumnOrName") -> Column:
|
|
2255
|
+
return _invoke_function("format_string", lit(format), *[_to_col(c) for c in cols])
|
|
2256
|
+
|
|
2257
|
+
|
|
2258
|
+
format_string.__doc__ = pysparkfuncs.format_string.__doc__
|
|
2259
|
+
|
|
2260
|
+
|
|
2261
|
+
def instr(str: "ColumnOrName", substr: str) -> Column:
|
|
2262
|
+
return _invoke_function("instr", _to_col(str), lit(substr))
|
|
2263
|
+
|
|
2264
|
+
|
|
2265
|
+
instr.__doc__ = pysparkfuncs.instr.__doc__
|
|
2266
|
+
|
|
2267
|
+
|
|
2268
|
+
def overlay(
|
|
2269
|
+
src: "ColumnOrName",
|
|
2270
|
+
replace: "ColumnOrName",
|
|
2271
|
+
pos: Union["ColumnOrName", int],
|
|
2272
|
+
len: Union["ColumnOrName", int] = -1,
|
|
2273
|
+
) -> Column:
|
|
2274
|
+
if not isinstance(pos, (int, str, Column)):
|
|
2275
|
+
raise PySparkTypeError(
|
|
2276
|
+
error_class="NOT_COLUMN_OR_INT_OR_STR",
|
|
2277
|
+
message_parameters={"arg_name": "pos", "arg_type": type(pos).__name__},
|
|
2278
|
+
)
|
|
2279
|
+
if len is not None and not isinstance(len, (int, str, Column)):
|
|
2280
|
+
raise PySparkTypeError(
|
|
2281
|
+
error_class="NOT_COLUMN_OR_INT_OR_STR",
|
|
2282
|
+
message_parameters={"arg_name": "len", "arg_type": type(len).__name__},
|
|
2283
|
+
)
|
|
2284
|
+
|
|
2285
|
+
if isinstance(pos, int):
|
|
2286
|
+
pos = lit(pos)
|
|
2287
|
+
if isinstance(len, int):
|
|
2288
|
+
len = lit(len)
|
|
2289
|
+
|
|
2290
|
+
return _invoke_function_over_columns("overlay", src, replace, pos, len)
|
|
2291
|
+
|
|
2292
|
+
|
|
2293
|
+
overlay.__doc__ = pysparkfuncs.overlay.__doc__
|
|
2294
|
+
|
|
2295
|
+
|
|
2296
|
+
def sentences(
|
|
2297
|
+
string: "ColumnOrName",
|
|
2298
|
+
language: Optional["ColumnOrName"] = None,
|
|
2299
|
+
country: Optional["ColumnOrName"] = None,
|
|
2300
|
+
) -> Column:
|
|
2301
|
+
_language = lit("") if language is None else _to_col(language)
|
|
2302
|
+
_country = lit("") if country is None else _to_col(country)
|
|
2303
|
+
|
|
2304
|
+
return _invoke_function("sentences", _to_col(string), _language, _country)
|
|
2305
|
+
|
|
2306
|
+
|
|
2307
|
+
sentences.__doc__ = pysparkfuncs.sentences.__doc__
|
|
2308
|
+
|
|
2309
|
+
|
|
2310
|
+
def substring(str: "ColumnOrName", pos: int, len: int) -> Column:
|
|
2311
|
+
return _invoke_function("substring", _to_col(str), lit(pos), lit(len))
|
|
2312
|
+
|
|
2313
|
+
|
|
2314
|
+
substring.__doc__ = pysparkfuncs.substring.__doc__
|
|
2315
|
+
|
|
2316
|
+
|
|
2317
|
+
def substring_index(str: "ColumnOrName", delim: str, count: int) -> Column:
|
|
2318
|
+
return _invoke_function("substring_index", _to_col(str), lit(delim), lit(count))
|
|
2319
|
+
|
|
2320
|
+
|
|
2321
|
+
substring_index.__doc__ = pysparkfuncs.substring_index.__doc__
|
|
2322
|
+
|
|
2323
|
+
|
|
2324
|
+
def levenshtein(
|
|
2325
|
+
left: "ColumnOrName", right: "ColumnOrName", threshold: Optional[int] = None
|
|
2326
|
+
) -> Column:
|
|
2327
|
+
if threshold is None:
|
|
2328
|
+
return _invoke_function_over_columns("levenshtein", left, right)
|
|
2329
|
+
else:
|
|
2330
|
+
return _invoke_function("levenshtein", _to_col(left), _to_col(right), lit(threshold))
|
|
2331
|
+
|
|
2332
|
+
|
|
2333
|
+
levenshtein.__doc__ = pysparkfuncs.levenshtein.__doc__
|
|
2334
|
+
|
|
2335
|
+
|
|
2336
|
+
def locate(substr: str, str: "ColumnOrName", pos: int = 1) -> Column:
|
|
2337
|
+
return _invoke_function("locate", lit(substr), _to_col(str), lit(pos))
|
|
2338
|
+
|
|
2339
|
+
|
|
2340
|
+
locate.__doc__ = pysparkfuncs.locate.__doc__
|
|
2341
|
+
|
|
2342
|
+
|
|
2343
|
+
def lpad(col: "ColumnOrName", len: int, pad: str) -> Column:
|
|
2344
|
+
return _invoke_function("lpad", _to_col(col), lit(len), lit(pad))
|
|
2345
|
+
|
|
2346
|
+
|
|
2347
|
+
lpad.__doc__ = pysparkfuncs.lpad.__doc__
|
|
2348
|
+
|
|
2349
|
+
|
|
2350
|
+
def rpad(col: "ColumnOrName", len: int, pad: str) -> Column:
|
|
2351
|
+
return _invoke_function("rpad", _to_col(col), lit(len), lit(pad))
|
|
2352
|
+
|
|
2353
|
+
|
|
2354
|
+
rpad.__doc__ = pysparkfuncs.rpad.__doc__
|
|
2355
|
+
|
|
2356
|
+
|
|
2357
|
+
def repeat(col: "ColumnOrName", n: int) -> Column:
|
|
2358
|
+
return _invoke_function("repeat", _to_col(col), lit(n))
|
|
2359
|
+
|
|
2360
|
+
|
|
2361
|
+
repeat.__doc__ = pysparkfuncs.repeat.__doc__
|
|
2362
|
+
|
|
2363
|
+
|
|
2364
|
+
def split(str: "ColumnOrName", pattern: str, limit: int = -1) -> Column:
|
|
2365
|
+
return _invoke_function("split", _to_col(str), lit(pattern), lit(limit))
|
|
2366
|
+
|
|
2367
|
+
|
|
2368
|
+
split.__doc__ = pysparkfuncs.split.__doc__
|
|
2369
|
+
|
|
2370
|
+
|
|
2371
|
+
def rlike(str: "ColumnOrName", regexp: "ColumnOrName") -> Column:
|
|
2372
|
+
return _invoke_function_over_columns("rlike", str, regexp)
|
|
2373
|
+
|
|
2374
|
+
|
|
2375
|
+
rlike.__doc__ = pysparkfuncs.rlike.__doc__
|
|
2376
|
+
|
|
2377
|
+
|
|
2378
|
+
def regexp(str: "ColumnOrName", regexp: "ColumnOrName") -> Column:
|
|
2379
|
+
return _invoke_function_over_columns("regexp", str, regexp)
|
|
2380
|
+
|
|
2381
|
+
|
|
2382
|
+
regexp.__doc__ = pysparkfuncs.regexp.__doc__
|
|
2383
|
+
|
|
2384
|
+
|
|
2385
|
+
def regexp_like(str: "ColumnOrName", regexp: "ColumnOrName") -> Column:
|
|
2386
|
+
return _invoke_function_over_columns("regexp_like", str, regexp)
|
|
2387
|
+
|
|
2388
|
+
|
|
2389
|
+
regexp_like.__doc__ = pysparkfuncs.regexp_like.__doc__
|
|
2390
|
+
|
|
2391
|
+
|
|
2392
|
+
def regexp_count(str: "ColumnOrName", regexp: "ColumnOrName") -> Column:
|
|
2393
|
+
return _invoke_function_over_columns("regexp_count", str, regexp)
|
|
2394
|
+
|
|
2395
|
+
|
|
2396
|
+
regexp_count.__doc__ = pysparkfuncs.regexp_count.__doc__
|
|
2397
|
+
|
|
2398
|
+
|
|
2399
|
+
def regexp_extract(str: "ColumnOrName", pattern: str, idx: int) -> Column:
|
|
2400
|
+
return _invoke_function("regexp_extract", _to_col(str), lit(pattern), lit(idx))
|
|
2401
|
+
|
|
2402
|
+
|
|
2403
|
+
regexp_extract.__doc__ = pysparkfuncs.regexp_extract.__doc__
|
|
2404
|
+
|
|
2405
|
+
|
|
2406
|
+
def regexp_extract_all(
|
|
2407
|
+
str: "ColumnOrName", regexp: "ColumnOrName", idx: Optional[Union[int, Column]] = None
|
|
2408
|
+
) -> Column:
|
|
2409
|
+
if idx is None:
|
|
2410
|
+
return _invoke_function_over_columns("regexp_extract_all", str, regexp)
|
|
2411
|
+
else:
|
|
2412
|
+
if isinstance(idx, int):
|
|
2413
|
+
idx = lit(idx)
|
|
2414
|
+
return _invoke_function_over_columns("regexp_extract_all", str, regexp, idx)
|
|
2415
|
+
|
|
2416
|
+
|
|
2417
|
+
regexp_extract_all.__doc__ = pysparkfuncs.regexp_extract_all.__doc__
|
|
2418
|
+
|
|
2419
|
+
|
|
2420
|
+
def regexp_replace(
|
|
2421
|
+
string: "ColumnOrName", pattern: Union[str, Column], replacement: Union[str, Column]
|
|
2422
|
+
) -> Column:
|
|
2423
|
+
if isinstance(pattern, str):
|
|
2424
|
+
pattern = lit(pattern)
|
|
2425
|
+
|
|
2426
|
+
if isinstance(replacement, str):
|
|
2427
|
+
replacement = lit(replacement)
|
|
2428
|
+
|
|
2429
|
+
return _invoke_function("regexp_replace", _to_col(string), pattern, replacement)
|
|
2430
|
+
|
|
2431
|
+
|
|
2432
|
+
regexp_replace.__doc__ = pysparkfuncs.regexp_replace.__doc__
|
|
2433
|
+
|
|
2434
|
+
|
|
2435
|
+
def regexp_substr(str: "ColumnOrName", regexp: "ColumnOrName") -> Column:
|
|
2436
|
+
return _invoke_function_over_columns("regexp_substr", str, regexp)
|
|
2437
|
+
|
|
2438
|
+
|
|
2439
|
+
regexp_substr.__doc__ = pysparkfuncs.regexp_substr.__doc__
|
|
2440
|
+
|
|
2441
|
+
|
|
2442
|
+
def regexp_instr(
|
|
2443
|
+
str: "ColumnOrName", regexp: "ColumnOrName", idx: Optional[Union[int, Column]] = None
|
|
2444
|
+
) -> Column:
|
|
2445
|
+
if idx is None:
|
|
2446
|
+
return _invoke_function_over_columns("regexp_instr", str, regexp)
|
|
2447
|
+
else:
|
|
2448
|
+
if isinstance(idx, int):
|
|
2449
|
+
idx = lit(idx)
|
|
2450
|
+
return _invoke_function_over_columns("regexp_instr", str, regexp, idx)
|
|
2451
|
+
|
|
2452
|
+
|
|
2453
|
+
regexp_instr.__doc__ = pysparkfuncs.regexp_instr.__doc__
|
|
2454
|
+
|
|
2455
|
+
|
|
2456
|
+
def initcap(col: "ColumnOrName") -> Column:
|
|
2457
|
+
return _invoke_function_over_columns("initcap", col)
|
|
2458
|
+
|
|
2459
|
+
|
|
2460
|
+
initcap.__doc__ = pysparkfuncs.initcap.__doc__
|
|
2461
|
+
|
|
2462
|
+
|
|
2463
|
+
def soundex(col: "ColumnOrName") -> Column:
|
|
2464
|
+
return _invoke_function_over_columns("soundex", col)
|
|
2465
|
+
|
|
2466
|
+
|
|
2467
|
+
soundex.__doc__ = pysparkfuncs.soundex.__doc__
|
|
2468
|
+
|
|
2469
|
+
|
|
2470
|
+
def length(col: "ColumnOrName") -> Column:
|
|
2471
|
+
return _invoke_function_over_columns("length", col)
|
|
2472
|
+
|
|
2473
|
+
|
|
2474
|
+
length.__doc__ = pysparkfuncs.length.__doc__
|
|
2475
|
+
|
|
2476
|
+
|
|
2477
|
+
def octet_length(col: "ColumnOrName") -> Column:
|
|
2478
|
+
return _invoke_function_over_columns("octet_length", col)
|
|
2479
|
+
|
|
2480
|
+
|
|
2481
|
+
octet_length.__doc__ = pysparkfuncs.octet_length.__doc__
|
|
2482
|
+
|
|
2483
|
+
|
|
2484
|
+
def bit_length(col: "ColumnOrName") -> Column:
|
|
2485
|
+
return _invoke_function_over_columns("bit_length", col)
|
|
2486
|
+
|
|
2487
|
+
|
|
2488
|
+
bit_length.__doc__ = pysparkfuncs.bit_length.__doc__
|
|
2489
|
+
|
|
2490
|
+
|
|
2491
|
+
def translate(srcCol: "ColumnOrName", matching: str, replace: str) -> Column:
|
|
2492
|
+
return _invoke_function("translate", _to_col(srcCol), lit(matching), lit(replace))
|
|
2493
|
+
|
|
2494
|
+
|
|
2495
|
+
translate.__doc__ = pysparkfuncs.translate.__doc__
|
|
2496
|
+
|
|
2497
|
+
|
|
2498
|
+
def to_binary(col: "ColumnOrName", format: Optional["ColumnOrName"] = None) -> Column:
|
|
2499
|
+
if format is not None:
|
|
2500
|
+
return _invoke_function_over_columns("to_binary", col, format)
|
|
2501
|
+
else:
|
|
2502
|
+
return _invoke_function_over_columns("to_binary", col)
|
|
2503
|
+
|
|
2504
|
+
|
|
2505
|
+
to_binary.__doc__ = pysparkfuncs.to_binary.__doc__
|
|
2506
|
+
|
|
2507
|
+
|
|
2508
|
+
def to_char(col: "ColumnOrName", format: "ColumnOrName") -> Column:
|
|
2509
|
+
return _invoke_function_over_columns("to_char", col, format)
|
|
2510
|
+
|
|
2511
|
+
|
|
2512
|
+
to_char.__doc__ = pysparkfuncs.to_char.__doc__
|
|
2513
|
+
|
|
2514
|
+
|
|
2515
|
+
def to_varchar(col: "ColumnOrName", format: "ColumnOrName") -> Column:
|
|
2516
|
+
return _invoke_function_over_columns("to_varchar", col, format)
|
|
2517
|
+
|
|
2518
|
+
|
|
2519
|
+
to_varchar.__doc__ = pysparkfuncs.to_varchar.__doc__
|
|
2520
|
+
|
|
2521
|
+
|
|
2522
|
+
def to_number(col: "ColumnOrName", format: "ColumnOrName") -> Column:
|
|
2523
|
+
return _invoke_function_over_columns("to_number", col, format)
|
|
2524
|
+
|
|
2525
|
+
|
|
2526
|
+
to_number.__doc__ = pysparkfuncs.to_number.__doc__
|
|
2527
|
+
|
|
2528
|
+
|
|
2529
|
+
def replace(
|
|
2530
|
+
src: "ColumnOrName", search: "ColumnOrName", replace: Optional["ColumnOrName"] = None
|
|
2531
|
+
) -> Column:
|
|
2532
|
+
if replace is not None:
|
|
2533
|
+
return _invoke_function_over_columns("replace", src, search, replace)
|
|
2534
|
+
else:
|
|
2535
|
+
return _invoke_function_over_columns("replace", src, search)
|
|
2536
|
+
|
|
2537
|
+
|
|
2538
|
+
replace.__doc__ = pysparkfuncs.replace.__doc__
|
|
2539
|
+
|
|
2540
|
+
|
|
2541
|
+
def split_part(src: "ColumnOrName", delimiter: "ColumnOrName", partNum: "ColumnOrName") -> Column:
|
|
2542
|
+
return _invoke_function_over_columns("split_part", src, delimiter, partNum)
|
|
2543
|
+
|
|
2544
|
+
|
|
2545
|
+
split_part.__doc__ = pysparkfuncs.split_part.__doc__
|
|
2546
|
+
|
|
2547
|
+
|
|
2548
|
+
def substr(
|
|
2549
|
+
str: "ColumnOrName", pos: "ColumnOrName", len: Optional["ColumnOrName"] = None
|
|
2550
|
+
) -> Column:
|
|
2551
|
+
if len is not None:
|
|
2552
|
+
return _invoke_function_over_columns("substr", str, pos, len)
|
|
2553
|
+
else:
|
|
2554
|
+
return _invoke_function_over_columns("substr", str, pos)
|
|
2555
|
+
|
|
2556
|
+
|
|
2557
|
+
substr.__doc__ = pysparkfuncs.substr.__doc__
|
|
2558
|
+
|
|
2559
|
+
|
|
2560
|
+
def parse_url(
|
|
2561
|
+
url: "ColumnOrName", partToExtract: "ColumnOrName", key: Optional["ColumnOrName"] = None
|
|
2562
|
+
) -> Column:
|
|
2563
|
+
if key is not None:
|
|
2564
|
+
return _invoke_function_over_columns("parse_url", url, partToExtract, key)
|
|
2565
|
+
else:
|
|
2566
|
+
return _invoke_function_over_columns("parse_url", url, partToExtract)
|
|
2567
|
+
|
|
2568
|
+
|
|
2569
|
+
parse_url.__doc__ = pysparkfuncs.parse_url.__doc__
|
|
2570
|
+
|
|
2571
|
+
|
|
2572
|
+
def printf(format: "ColumnOrName", *cols: "ColumnOrName") -> Column:
|
|
2573
|
+
return _invoke_function("printf", _to_col(format), *[_to_col(c) for c in cols])
|
|
2574
|
+
|
|
2575
|
+
|
|
2576
|
+
printf.__doc__ = pysparkfuncs.printf.__doc__
|
|
2577
|
+
|
|
2578
|
+
|
|
2579
|
+
def url_decode(str: "ColumnOrName") -> Column:
|
|
2580
|
+
return _invoke_function_over_columns("url_decode", str)
|
|
2581
|
+
|
|
2582
|
+
|
|
2583
|
+
url_decode.__doc__ = pysparkfuncs.url_decode.__doc__
|
|
2584
|
+
|
|
2585
|
+
|
|
2586
|
+
def url_encode(str: "ColumnOrName") -> Column:
|
|
2587
|
+
return _invoke_function_over_columns("url_encode", str)
|
|
2588
|
+
|
|
2589
|
+
|
|
2590
|
+
url_encode.__doc__ = pysparkfuncs.url_encode.__doc__
|
|
2591
|
+
|
|
2592
|
+
|
|
2593
|
+
def position(
|
|
2594
|
+
substr: "ColumnOrName", str: "ColumnOrName", start: Optional["ColumnOrName"] = None
|
|
2595
|
+
) -> Column:
|
|
2596
|
+
if start is not None:
|
|
2597
|
+
return _invoke_function_over_columns("position", substr, str, start)
|
|
2598
|
+
else:
|
|
2599
|
+
return _invoke_function_over_columns("position", substr, str)
|
|
2600
|
+
|
|
2601
|
+
|
|
2602
|
+
position.__doc__ = pysparkfuncs.position.__doc__
|
|
2603
|
+
|
|
2604
|
+
|
|
2605
|
+
def endswith(str: "ColumnOrName", suffix: "ColumnOrName") -> Column:
|
|
2606
|
+
return _invoke_function_over_columns("endswith", str, suffix)
|
|
2607
|
+
|
|
2608
|
+
|
|
2609
|
+
endswith.__doc__ = pysparkfuncs.endswith.__doc__
|
|
2610
|
+
|
|
2611
|
+
|
|
2612
|
+
def startswith(str: "ColumnOrName", prefix: "ColumnOrName") -> Column:
|
|
2613
|
+
return _invoke_function_over_columns("startswith", str, prefix)
|
|
2614
|
+
|
|
2615
|
+
|
|
2616
|
+
startswith.__doc__ = pysparkfuncs.startswith.__doc__
|
|
2617
|
+
|
|
2618
|
+
|
|
2619
|
+
def char(col: "ColumnOrName") -> Column:
|
|
2620
|
+
return _invoke_function_over_columns("char", col)
|
|
2621
|
+
|
|
2622
|
+
|
|
2623
|
+
char.__doc__ = pysparkfuncs.char.__doc__
|
|
2624
|
+
|
|
2625
|
+
|
|
2626
|
+
def try_to_binary(col: "ColumnOrName", format: Optional["ColumnOrName"] = None) -> Column:
|
|
2627
|
+
if format is not None:
|
|
2628
|
+
return _invoke_function_over_columns("try_to_binary", col, format)
|
|
2629
|
+
else:
|
|
2630
|
+
return _invoke_function_over_columns("try_to_binary", col)
|
|
2631
|
+
|
|
2632
|
+
|
|
2633
|
+
try_to_binary.__doc__ = pysparkfuncs.try_to_binary.__doc__
|
|
2634
|
+
|
|
2635
|
+
|
|
2636
|
+
def try_to_number(col: "ColumnOrName", format: "ColumnOrName") -> Column:
|
|
2637
|
+
return _invoke_function_over_columns("try_to_number", col, format)
|
|
2638
|
+
|
|
2639
|
+
|
|
2640
|
+
try_to_number.__doc__ = pysparkfuncs.try_to_number.__doc__
|
|
2641
|
+
|
|
2642
|
+
|
|
2643
|
+
def btrim(str: "ColumnOrName", trim: Optional["ColumnOrName"] = None) -> Column:
|
|
2644
|
+
if trim is not None:
|
|
2645
|
+
return _invoke_function_over_columns("btrim", str, trim)
|
|
2646
|
+
else:
|
|
2647
|
+
return _invoke_function_over_columns("btrim", str)
|
|
2648
|
+
|
|
2649
|
+
|
|
2650
|
+
btrim.__doc__ = pysparkfuncs.btrim.__doc__
|
|
2651
|
+
|
|
2652
|
+
|
|
2653
|
+
def char_length(str: "ColumnOrName") -> Column:
|
|
2654
|
+
return _invoke_function_over_columns("char_length", str)
|
|
2655
|
+
|
|
2656
|
+
|
|
2657
|
+
char_length.__doc__ = pysparkfuncs.char_length.__doc__
|
|
2658
|
+
|
|
2659
|
+
|
|
2660
|
+
def character_length(str: "ColumnOrName") -> Column:
|
|
2661
|
+
return _invoke_function_over_columns("character_length", str)
|
|
2662
|
+
|
|
2663
|
+
|
|
2664
|
+
character_length.__doc__ = pysparkfuncs.character_length.__doc__
|
|
2665
|
+
|
|
2666
|
+
|
|
2667
|
+
def contains(left: "ColumnOrName", right: "ColumnOrName") -> Column:
|
|
2668
|
+
return _invoke_function_over_columns("contains", left, right)
|
|
2669
|
+
|
|
2670
|
+
|
|
2671
|
+
contains.__doc__ = pysparkfuncs.contains.__doc__
|
|
2672
|
+
|
|
2673
|
+
|
|
2674
|
+
def elt(*inputs: "ColumnOrName") -> Column:
|
|
2675
|
+
return _invoke_function("elt", *[_to_col(input) for input in inputs])
|
|
2676
|
+
|
|
2677
|
+
|
|
2678
|
+
elt.__doc__ = pysparkfuncs.elt.__doc__
|
|
2679
|
+
|
|
2680
|
+
|
|
2681
|
+
def find_in_set(str: "ColumnOrName", str_array: "ColumnOrName") -> Column:
|
|
2682
|
+
return _invoke_function_over_columns("find_in_set", str, str_array)
|
|
2683
|
+
|
|
2684
|
+
|
|
2685
|
+
find_in_set.__doc__ = pysparkfuncs.find_in_set.__doc__
|
|
2686
|
+
|
|
2687
|
+
|
|
2688
|
+
def like(
|
|
2689
|
+
str: "ColumnOrName", pattern: "ColumnOrName", escapeChar: Optional["Column"] = None
|
|
2690
|
+
) -> Column:
|
|
2691
|
+
if escapeChar is not None:
|
|
2692
|
+
return _invoke_function_over_columns("like", str, pattern, escapeChar)
|
|
2693
|
+
else:
|
|
2694
|
+
return _invoke_function_over_columns("like", str, pattern)
|
|
2695
|
+
|
|
2696
|
+
|
|
2697
|
+
like.__doc__ = pysparkfuncs.like.__doc__
|
|
2698
|
+
|
|
2699
|
+
|
|
2700
|
+
def ilike(
|
|
2701
|
+
str: "ColumnOrName", pattern: "ColumnOrName", escapeChar: Optional["Column"] = None
|
|
2702
|
+
) -> Column:
|
|
2703
|
+
if escapeChar is not None:
|
|
2704
|
+
return _invoke_function_over_columns("ilike", str, pattern, escapeChar)
|
|
2705
|
+
else:
|
|
2706
|
+
return _invoke_function_over_columns("ilike", str, pattern)
|
|
2707
|
+
|
|
2708
|
+
|
|
2709
|
+
ilike.__doc__ = pysparkfuncs.ilike.__doc__
|
|
2710
|
+
|
|
2711
|
+
|
|
2712
|
+
def lcase(str: "ColumnOrName") -> Column:
|
|
2713
|
+
return _invoke_function_over_columns("lcase", str)
|
|
2714
|
+
|
|
2715
|
+
|
|
2716
|
+
lcase.__doc__ = pysparkfuncs.lcase.__doc__
|
|
2717
|
+
|
|
2718
|
+
|
|
2719
|
+
def ucase(str: "ColumnOrName") -> Column:
|
|
2720
|
+
return _invoke_function_over_columns("ucase", str)
|
|
2721
|
+
|
|
2722
|
+
|
|
2723
|
+
ucase.__doc__ = pysparkfuncs.ucase.__doc__
|
|
2724
|
+
|
|
2725
|
+
|
|
2726
|
+
def left(str: "ColumnOrName", len: "ColumnOrName") -> Column:
|
|
2727
|
+
return _invoke_function_over_columns("left", str, len)
|
|
2728
|
+
|
|
2729
|
+
|
|
2730
|
+
left.__doc__ = pysparkfuncs.left.__doc__
|
|
2731
|
+
|
|
2732
|
+
|
|
2733
|
+
def right(str: "ColumnOrName", len: "ColumnOrName") -> Column:
|
|
2734
|
+
return _invoke_function_over_columns("right", str, len)
|
|
2735
|
+
|
|
2736
|
+
|
|
2737
|
+
right.__doc__ = pysparkfuncs.right.__doc__
|
|
2738
|
+
|
|
2739
|
+
|
|
2740
|
+
def mask(
|
|
2741
|
+
col: "ColumnOrName",
|
|
2742
|
+
upperChar: Optional["ColumnOrName"] = None,
|
|
2743
|
+
lowerChar: Optional["ColumnOrName"] = None,
|
|
2744
|
+
digitChar: Optional["ColumnOrName"] = None,
|
|
2745
|
+
otherChar: Optional["ColumnOrName"] = None,
|
|
2746
|
+
) -> Column:
|
|
2747
|
+
_upperChar = lit("X") if upperChar is None else upperChar
|
|
2748
|
+
_lowerChar = lit("x") if lowerChar is None else lowerChar
|
|
2749
|
+
_digitChar = lit("n") if digitChar is None else digitChar
|
|
2750
|
+
_otherChar = lit(None) if otherChar is None else otherChar
|
|
2751
|
+
|
|
2752
|
+
return _invoke_function_over_columns(
|
|
2753
|
+
"mask", col, _upperChar, _lowerChar, _digitChar, _otherChar
|
|
2754
|
+
)
|
|
2755
|
+
|
|
2756
|
+
|
|
2757
|
+
mask.__doc__ = pysparkfuncs.mask.__doc__
|
|
2758
|
+
|
|
2759
|
+
|
|
2760
|
+
# Date/Timestamp functions
|
|
2761
|
+
# TODO(SPARK-41455): Resolve dtypes inconsistencies for:
|
|
2762
|
+
# to_timestamp, from_utc_timestamp, to_utc_timestamp,
|
|
2763
|
+
# timestamp_seconds, current_timestamp, date_trunc
|
|
2764
|
+
|
|
2765
|
+
|
|
2766
|
+
def curdate() -> Column:
|
|
2767
|
+
return _invoke_function("curdate")
|
|
2768
|
+
|
|
2769
|
+
|
|
2770
|
+
curdate.__doc__ = pysparkfuncs.curdate.__doc__
|
|
2771
|
+
|
|
2772
|
+
|
|
2773
|
+
def current_date() -> Column:
|
|
2774
|
+
return _invoke_function("current_date")
|
|
2775
|
+
|
|
2776
|
+
|
|
2777
|
+
current_date.__doc__ = pysparkfuncs.current_date.__doc__
|
|
2778
|
+
|
|
2779
|
+
|
|
2780
|
+
def current_timestamp() -> Column:
|
|
2781
|
+
return _invoke_function("current_timestamp")
|
|
2782
|
+
|
|
2783
|
+
|
|
2784
|
+
current_timestamp.__doc__ = pysparkfuncs.current_timestamp.__doc__
|
|
2785
|
+
|
|
2786
|
+
|
|
2787
|
+
def now() -> Column:
|
|
2788
|
+
return _invoke_function("current_timestamp")
|
|
2789
|
+
|
|
2790
|
+
|
|
2791
|
+
now.__doc__ = pysparkfuncs.now.__doc__
|
|
2792
|
+
|
|
2793
|
+
|
|
2794
|
+
def current_timezone() -> Column:
|
|
2795
|
+
return _invoke_function("current_timezone")
|
|
2796
|
+
|
|
2797
|
+
|
|
2798
|
+
current_timezone.__doc__ = pysparkfuncs.current_timezone.__doc__
|
|
2799
|
+
|
|
2800
|
+
|
|
2801
|
+
def localtimestamp() -> Column:
|
|
2802
|
+
return _invoke_function("localtimestamp")
|
|
2803
|
+
|
|
2804
|
+
|
|
2805
|
+
localtimestamp.__doc__ = pysparkfuncs.localtimestamp.__doc__
|
|
2806
|
+
|
|
2807
|
+
|
|
2808
|
+
def date_format(date: "ColumnOrName", format: str) -> Column:
|
|
2809
|
+
return _invoke_function("date_format", _to_col(date), lit(format))
|
|
2810
|
+
|
|
2811
|
+
|
|
2812
|
+
date_format.__doc__ = pysparkfuncs.date_format.__doc__
|
|
2813
|
+
|
|
2814
|
+
|
|
2815
|
+
def year(col: "ColumnOrName") -> Column:
|
|
2816
|
+
return _invoke_function_over_columns("year", col)
|
|
2817
|
+
|
|
2818
|
+
|
|
2819
|
+
year.__doc__ = pysparkfuncs.year.__doc__
|
|
2820
|
+
|
|
2821
|
+
|
|
2822
|
+
def quarter(col: "ColumnOrName") -> Column:
|
|
2823
|
+
return _invoke_function_over_columns("quarter", col)
|
|
2824
|
+
|
|
2825
|
+
|
|
2826
|
+
quarter.__doc__ = pysparkfuncs.quarter.__doc__
|
|
2827
|
+
|
|
2828
|
+
|
|
2829
|
+
def month(col: "ColumnOrName") -> Column:
|
|
2830
|
+
return _invoke_function_over_columns("month", col)
|
|
2831
|
+
|
|
2832
|
+
|
|
2833
|
+
month.__doc__ = pysparkfuncs.month.__doc__
|
|
2834
|
+
|
|
2835
|
+
|
|
2836
|
+
def dayofweek(col: "ColumnOrName") -> Column:
|
|
2837
|
+
return _invoke_function_over_columns("dayofweek", col)
|
|
2838
|
+
|
|
2839
|
+
|
|
2840
|
+
dayofweek.__doc__ = pysparkfuncs.dayofweek.__doc__
|
|
2841
|
+
|
|
2842
|
+
|
|
2843
|
+
def dayofmonth(col: "ColumnOrName") -> Column:
|
|
2844
|
+
return _invoke_function_over_columns("dayofmonth", col)
|
|
2845
|
+
|
|
2846
|
+
|
|
2847
|
+
dayofmonth.__doc__ = pysparkfuncs.dayofmonth.__doc__
|
|
2848
|
+
|
|
2849
|
+
|
|
2850
|
+
def day(col: "ColumnOrName") -> Column:
|
|
2851
|
+
return _invoke_function_over_columns("day", col)
|
|
2852
|
+
|
|
2853
|
+
|
|
2854
|
+
day.__doc__ = pysparkfuncs.day.__doc__
|
|
2855
|
+
|
|
2856
|
+
|
|
2857
|
+
def dayofyear(col: "ColumnOrName") -> Column:
|
|
2858
|
+
return _invoke_function_over_columns("dayofyear", col)
|
|
2859
|
+
|
|
2860
|
+
|
|
2861
|
+
dayofyear.__doc__ = pysparkfuncs.dayofyear.__doc__
|
|
2862
|
+
|
|
2863
|
+
|
|
2864
|
+
def hour(col: "ColumnOrName") -> Column:
|
|
2865
|
+
return _invoke_function_over_columns("hour", col)
|
|
2866
|
+
|
|
2867
|
+
|
|
2868
|
+
hour.__doc__ = pysparkfuncs.hour.__doc__
|
|
2869
|
+
|
|
2870
|
+
|
|
2871
|
+
def minute(col: "ColumnOrName") -> Column:
|
|
2872
|
+
return _invoke_function_over_columns("minute", col)
|
|
2873
|
+
|
|
2874
|
+
|
|
2875
|
+
minute.__doc__ = pysparkfuncs.minute.__doc__
|
|
2876
|
+
|
|
2877
|
+
|
|
2878
|
+
def second(col: "ColumnOrName") -> Column:
|
|
2879
|
+
return _invoke_function_over_columns("second", col)
|
|
2880
|
+
|
|
2881
|
+
|
|
2882
|
+
second.__doc__ = pysparkfuncs.second.__doc__
|
|
2883
|
+
|
|
2884
|
+
|
|
2885
|
+
def weekofyear(col: "ColumnOrName") -> Column:
|
|
2886
|
+
return _invoke_function_over_columns("weekofyear", col)
|
|
2887
|
+
|
|
2888
|
+
|
|
2889
|
+
weekofyear.__doc__ = pysparkfuncs.weekofyear.__doc__
|
|
2890
|
+
|
|
2891
|
+
|
|
2892
|
+
def weekday(col: "ColumnOrName") -> Column:
|
|
2893
|
+
return _invoke_function_over_columns("weekday", col)
|
|
2894
|
+
|
|
2895
|
+
|
|
2896
|
+
weekday.__doc__ = pysparkfuncs.weekday.__doc__
|
|
2897
|
+
|
|
2898
|
+
|
|
2899
|
+
def extract(field: "ColumnOrName", source: "ColumnOrName") -> Column:
|
|
2900
|
+
return _invoke_function_over_columns("extract", field, source)
|
|
2901
|
+
|
|
2902
|
+
|
|
2903
|
+
extract.__doc__ = pysparkfuncs.extract.__doc__
|
|
2904
|
+
|
|
2905
|
+
|
|
2906
|
+
def date_part(field: "ColumnOrName", source: "ColumnOrName") -> Column:
|
|
2907
|
+
return _invoke_function_over_columns("date_part", field, source)
|
|
2908
|
+
|
|
2909
|
+
|
|
2910
|
+
extract.__doc__ = pysparkfuncs.extract.__doc__
|
|
2911
|
+
|
|
2912
|
+
|
|
2913
|
+
def datepart(field: "ColumnOrName", source: "ColumnOrName") -> Column:
|
|
2914
|
+
return _invoke_function_over_columns("datepart", field, source)
|
|
2915
|
+
|
|
2916
|
+
|
|
2917
|
+
extract.__doc__ = pysparkfuncs.extract.__doc__
|
|
2918
|
+
|
|
2919
|
+
|
|
2920
|
+
def make_date(year: "ColumnOrName", month: "ColumnOrName", day: "ColumnOrName") -> Column:
|
|
2921
|
+
return _invoke_function_over_columns("make_date", year, month, day)
|
|
2922
|
+
|
|
2923
|
+
|
|
2924
|
+
make_date.__doc__ = pysparkfuncs.make_date.__doc__
|
|
2925
|
+
|
|
2926
|
+
|
|
2927
|
+
def date_add(start: "ColumnOrName", days: Union["ColumnOrName", int]) -> Column:
|
|
2928
|
+
days = lit(days) if isinstance(days, int) else days
|
|
2929
|
+
return _invoke_function_over_columns("date_add", start, days)
|
|
2930
|
+
|
|
2931
|
+
|
|
2932
|
+
date_add.__doc__ = pysparkfuncs.date_add.__doc__
|
|
2933
|
+
|
|
2934
|
+
|
|
2935
|
+
def dateadd(start: "ColumnOrName", days: Union["ColumnOrName", int]) -> Column:
|
|
2936
|
+
days = lit(days) if isinstance(days, int) else days
|
|
2937
|
+
return _invoke_function_over_columns("dateadd", start, days)
|
|
2938
|
+
|
|
2939
|
+
|
|
2940
|
+
dateadd.__doc__ = pysparkfuncs.dateadd.__doc__
|
|
2941
|
+
|
|
2942
|
+
|
|
2943
|
+
def date_sub(start: "ColumnOrName", days: Union["ColumnOrName", int]) -> Column:
|
|
2944
|
+
days = lit(days) if isinstance(days, int) else days
|
|
2945
|
+
return _invoke_function_over_columns("date_sub", start, days)
|
|
2946
|
+
|
|
2947
|
+
|
|
2948
|
+
date_sub.__doc__ = pysparkfuncs.date_sub.__doc__
|
|
2949
|
+
|
|
2950
|
+
|
|
2951
|
+
def datediff(end: "ColumnOrName", start: "ColumnOrName") -> Column:
|
|
2952
|
+
return _invoke_function_over_columns("datediff", end, start)
|
|
2953
|
+
|
|
2954
|
+
|
|
2955
|
+
datediff.__doc__ = pysparkfuncs.datediff.__doc__
|
|
2956
|
+
|
|
2957
|
+
|
|
2958
|
+
def date_diff(end: "ColumnOrName", start: "ColumnOrName") -> Column:
|
|
2959
|
+
return _invoke_function_over_columns("date_diff", end, start)
|
|
2960
|
+
|
|
2961
|
+
|
|
2962
|
+
date_diff.__doc__ = pysparkfuncs.date_diff.__doc__
|
|
2963
|
+
|
|
2964
|
+
|
|
2965
|
+
def date_from_unix_date(days: "ColumnOrName") -> Column:
|
|
2966
|
+
return _invoke_function_over_columns("date_from_unix_date", days)
|
|
2967
|
+
|
|
2968
|
+
|
|
2969
|
+
date_from_unix_date.__doc__ = pysparkfuncs.date_from_unix_date.__doc__
|
|
2970
|
+
|
|
2971
|
+
|
|
2972
|
+
def add_months(start: "ColumnOrName", months: Union["ColumnOrName", int]) -> Column:
|
|
2973
|
+
months = lit(months) if isinstance(months, int) else months
|
|
2974
|
+
return _invoke_function_over_columns("add_months", start, months)
|
|
2975
|
+
|
|
2976
|
+
|
|
2977
|
+
add_months.__doc__ = pysparkfuncs.add_months.__doc__
|
|
2978
|
+
|
|
2979
|
+
|
|
2980
|
+
def months_between(date1: "ColumnOrName", date2: "ColumnOrName", roundOff: bool = True) -> Column:
|
|
2981
|
+
return _invoke_function("months_between", _to_col(date1), _to_col(date2), lit(roundOff))
|
|
2982
|
+
|
|
2983
|
+
|
|
2984
|
+
months_between.__doc__ = pysparkfuncs.months_between.__doc__
|
|
2985
|
+
|
|
2986
|
+
|
|
2987
|
+
def to_date(col: "ColumnOrName", format: Optional[str] = None) -> Column:
|
|
2988
|
+
if format is None:
|
|
2989
|
+
return _invoke_function_over_columns("to_date", col)
|
|
2990
|
+
else:
|
|
2991
|
+
return _invoke_function("to_date", _to_col(col), lit(format))
|
|
2992
|
+
|
|
2993
|
+
|
|
2994
|
+
to_date.__doc__ = pysparkfuncs.to_date.__doc__
|
|
2995
|
+
|
|
2996
|
+
|
|
2997
|
+
def unix_date(col: "ColumnOrName") -> Column:
|
|
2998
|
+
return _invoke_function_over_columns("unix_date", col)
|
|
2999
|
+
|
|
3000
|
+
|
|
3001
|
+
unix_date.__doc__ = pysparkfuncs.unix_date.__doc__
|
|
3002
|
+
|
|
3003
|
+
|
|
3004
|
+
def unix_micros(col: "ColumnOrName") -> Column:
|
|
3005
|
+
return _invoke_function_over_columns("unix_micros", col)
|
|
3006
|
+
|
|
3007
|
+
|
|
3008
|
+
unix_micros.__doc__ = pysparkfuncs.unix_micros.__doc__
|
|
3009
|
+
|
|
3010
|
+
|
|
3011
|
+
def unix_millis(col: "ColumnOrName") -> Column:
|
|
3012
|
+
return _invoke_function_over_columns("unix_millis", col)
|
|
3013
|
+
|
|
3014
|
+
|
|
3015
|
+
unix_millis.__doc__ = pysparkfuncs.unix_millis.__doc__
|
|
3016
|
+
|
|
3017
|
+
|
|
3018
|
+
def unix_seconds(col: "ColumnOrName") -> Column:
|
|
3019
|
+
return _invoke_function_over_columns("unix_seconds", col)
|
|
3020
|
+
|
|
3021
|
+
|
|
3022
|
+
unix_seconds.__doc__ = pysparkfuncs.unix_seconds.__doc__
|
|
3023
|
+
|
|
3024
|
+
|
|
3025
|
+
@overload
|
|
3026
|
+
def to_timestamp(col: "ColumnOrName") -> Column:
|
|
3027
|
+
...
|
|
3028
|
+
|
|
3029
|
+
|
|
3030
|
+
@overload
|
|
3031
|
+
def to_timestamp(col: "ColumnOrName", format: str) -> Column:
|
|
3032
|
+
...
|
|
3033
|
+
|
|
3034
|
+
|
|
3035
|
+
def to_timestamp(col: "ColumnOrName", format: Optional[str] = None) -> Column:
|
|
3036
|
+
if format is None:
|
|
3037
|
+
return _invoke_function_over_columns("to_timestamp", col)
|
|
3038
|
+
else:
|
|
3039
|
+
return _invoke_function("to_timestamp", _to_col(col), lit(format))
|
|
3040
|
+
|
|
3041
|
+
|
|
3042
|
+
to_timestamp.__doc__ = pysparkfuncs.to_timestamp.__doc__
|
|
3043
|
+
|
|
3044
|
+
|
|
3045
|
+
def try_to_timestamp(col: "ColumnOrName", format: Optional["ColumnOrName"] = None) -> Column:
|
|
3046
|
+
if format is not None:
|
|
3047
|
+
return _invoke_function_over_columns("try_to_timestamp", col, format)
|
|
3048
|
+
else:
|
|
3049
|
+
return _invoke_function_over_columns("try_to_timestamp", col)
|
|
3050
|
+
|
|
3051
|
+
|
|
3052
|
+
try_to_timestamp.__doc__ = pysparkfuncs.try_to_timestamp.__doc__
|
|
3053
|
+
|
|
3054
|
+
|
|
3055
|
+
def xpath(xml: "ColumnOrName", path: "ColumnOrName") -> Column:
|
|
3056
|
+
return _invoke_function_over_columns("xpath", xml, path)
|
|
3057
|
+
|
|
3058
|
+
|
|
3059
|
+
xpath.__doc__ = pysparkfuncs.xpath.__doc__
|
|
3060
|
+
|
|
3061
|
+
|
|
3062
|
+
def xpath_boolean(xml: "ColumnOrName", path: "ColumnOrName") -> Column:
|
|
3063
|
+
return _invoke_function_over_columns("xpath_boolean", xml, path)
|
|
3064
|
+
|
|
3065
|
+
|
|
3066
|
+
xpath_boolean.__doc__ = pysparkfuncs.xpath_boolean.__doc__
|
|
3067
|
+
|
|
3068
|
+
|
|
3069
|
+
def xpath_double(xml: "ColumnOrName", path: "ColumnOrName") -> Column:
|
|
3070
|
+
return _invoke_function_over_columns("xpath_double", xml, path)
|
|
3071
|
+
|
|
3072
|
+
|
|
3073
|
+
xpath_double.__doc__ = pysparkfuncs.xpath_double.__doc__
|
|
3074
|
+
|
|
3075
|
+
|
|
3076
|
+
def xpath_number(xml: "ColumnOrName", path: "ColumnOrName") -> Column:
|
|
3077
|
+
return _invoke_function_over_columns("xpath_number", xml, path)
|
|
3078
|
+
|
|
3079
|
+
|
|
3080
|
+
xpath_number.__doc__ = pysparkfuncs.xpath_number.__doc__
|
|
3081
|
+
|
|
3082
|
+
|
|
3083
|
+
def xpath_float(xml: "ColumnOrName", path: "ColumnOrName") -> Column:
|
|
3084
|
+
return _invoke_function_over_columns("xpath_float", xml, path)
|
|
3085
|
+
|
|
3086
|
+
|
|
3087
|
+
xpath_float.__doc__ = pysparkfuncs.xpath_float.__doc__
|
|
3088
|
+
|
|
3089
|
+
|
|
3090
|
+
def xpath_int(xml: "ColumnOrName", path: "ColumnOrName") -> Column:
|
|
3091
|
+
return _invoke_function_over_columns("xpath_int", xml, path)
|
|
3092
|
+
|
|
3093
|
+
|
|
3094
|
+
xpath_int.__doc__ = pysparkfuncs.xpath_int.__doc__
|
|
3095
|
+
|
|
3096
|
+
|
|
3097
|
+
def xpath_long(xml: "ColumnOrName", path: "ColumnOrName") -> Column:
|
|
3098
|
+
return _invoke_function_over_columns("xpath_long", xml, path)
|
|
3099
|
+
|
|
3100
|
+
|
|
3101
|
+
xpath_long.__doc__ = pysparkfuncs.xpath_long.__doc__
|
|
3102
|
+
|
|
3103
|
+
|
|
3104
|
+
def xpath_short(xml: "ColumnOrName", path: "ColumnOrName") -> Column:
|
|
3105
|
+
return _invoke_function_over_columns("xpath_short", xml, path)
|
|
3106
|
+
|
|
3107
|
+
|
|
3108
|
+
xpath_short.__doc__ = pysparkfuncs.xpath_short.__doc__
|
|
3109
|
+
|
|
3110
|
+
|
|
3111
|
+
def xpath_string(xml: "ColumnOrName", path: "ColumnOrName") -> Column:
|
|
3112
|
+
return _invoke_function_over_columns("xpath_string", xml, path)
|
|
3113
|
+
|
|
3114
|
+
|
|
3115
|
+
xpath_string.__doc__ = pysparkfuncs.xpath_string.__doc__
|
|
3116
|
+
|
|
3117
|
+
|
|
3118
|
+
def trunc(date: "ColumnOrName", format: str) -> Column:
|
|
3119
|
+
return _invoke_function("trunc", _to_col(date), lit(format))
|
|
3120
|
+
|
|
3121
|
+
|
|
3122
|
+
trunc.__doc__ = pysparkfuncs.trunc.__doc__
|
|
3123
|
+
|
|
3124
|
+
|
|
3125
|
+
def date_trunc(format: str, timestamp: "ColumnOrName") -> Column:
|
|
3126
|
+
return _invoke_function("date_trunc", lit(format), _to_col(timestamp))
|
|
3127
|
+
|
|
3128
|
+
|
|
3129
|
+
date_trunc.__doc__ = pysparkfuncs.date_trunc.__doc__
|
|
3130
|
+
|
|
3131
|
+
|
|
3132
|
+
def next_day(date: "ColumnOrName", dayOfWeek: str) -> Column:
|
|
3133
|
+
return _invoke_function("next_day", _to_col(date), lit(dayOfWeek))
|
|
3134
|
+
|
|
3135
|
+
|
|
3136
|
+
next_day.__doc__ = pysparkfuncs.next_day.__doc__
|
|
3137
|
+
|
|
3138
|
+
|
|
3139
|
+
def last_day(date: "ColumnOrName") -> Column:
|
|
3140
|
+
return _invoke_function_over_columns("last_day", date)
|
|
3141
|
+
|
|
3142
|
+
|
|
3143
|
+
last_day.__doc__ = pysparkfuncs.last_day.__doc__
|
|
3144
|
+
|
|
3145
|
+
|
|
3146
|
+
def from_unixtime(timestamp: "ColumnOrName", format: str = "yyyy-MM-dd HH:mm:ss") -> Column:
|
|
3147
|
+
return _invoke_function("from_unixtime", _to_col(timestamp), lit(format))
|
|
3148
|
+
|
|
3149
|
+
|
|
3150
|
+
from_unixtime.__doc__ = pysparkfuncs.from_unixtime.__doc__
|
|
3151
|
+
|
|
3152
|
+
|
|
3153
|
+
@overload
|
|
3154
|
+
def unix_timestamp(timestamp: "ColumnOrName", format: str = ...) -> Column:
|
|
3155
|
+
...
|
|
3156
|
+
|
|
3157
|
+
|
|
3158
|
+
@overload
|
|
3159
|
+
def unix_timestamp() -> Column:
|
|
3160
|
+
...
|
|
3161
|
+
|
|
3162
|
+
|
|
3163
|
+
def unix_timestamp(
|
|
3164
|
+
timestamp: Optional["ColumnOrName"] = None, format: str = "yyyy-MM-dd HH:mm:ss"
|
|
3165
|
+
) -> Column:
|
|
3166
|
+
if timestamp is None:
|
|
3167
|
+
return _invoke_function("unix_timestamp")
|
|
3168
|
+
return _invoke_function("unix_timestamp", _to_col(timestamp), lit(format))
|
|
3169
|
+
|
|
3170
|
+
|
|
3171
|
+
unix_timestamp.__doc__ = pysparkfuncs.unix_timestamp.__doc__
|
|
3172
|
+
|
|
3173
|
+
|
|
3174
|
+
def from_utc_timestamp(timestamp: "ColumnOrName", tz: "ColumnOrName") -> Column:
|
|
3175
|
+
if isinstance(tz, str):
|
|
3176
|
+
tz = lit(tz)
|
|
3177
|
+
return _invoke_function_over_columns("from_utc_timestamp", timestamp, tz)
|
|
3178
|
+
|
|
3179
|
+
|
|
3180
|
+
from_utc_timestamp.__doc__ = pysparkfuncs.from_utc_timestamp.__doc__
|
|
3181
|
+
|
|
3182
|
+
|
|
3183
|
+
def to_utc_timestamp(timestamp: "ColumnOrName", tz: "ColumnOrName") -> Column:
|
|
3184
|
+
if isinstance(tz, str):
|
|
3185
|
+
tz = lit(tz)
|
|
3186
|
+
return _invoke_function_over_columns("to_utc_timestamp", timestamp, tz)
|
|
3187
|
+
|
|
3188
|
+
|
|
3189
|
+
to_utc_timestamp.__doc__ = pysparkfuncs.to_utc_timestamp.__doc__
|
|
3190
|
+
|
|
3191
|
+
|
|
3192
|
+
def timestamp_seconds(col: "ColumnOrName") -> Column:
|
|
3193
|
+
return _invoke_function_over_columns("timestamp_seconds", col)
|
|
3194
|
+
|
|
3195
|
+
|
|
3196
|
+
timestamp_seconds.__doc__ = pysparkfuncs.timestamp_seconds.__doc__
|
|
3197
|
+
|
|
3198
|
+
|
|
3199
|
+
def timestamp_millis(col: "ColumnOrName") -> Column:
|
|
3200
|
+
return _invoke_function_over_columns("timestamp_millis", col)
|
|
3201
|
+
|
|
3202
|
+
|
|
3203
|
+
timestamp_millis.__doc__ = pysparkfuncs.timestamp_millis.__doc__
|
|
3204
|
+
|
|
3205
|
+
|
|
3206
|
+
def timestamp_micros(col: "ColumnOrName") -> Column:
|
|
3207
|
+
return _invoke_function_over_columns("timestamp_micros", col)
|
|
3208
|
+
|
|
3209
|
+
|
|
3210
|
+
timestamp_micros.__doc__ = pysparkfuncs.timestamp_micros.__doc__
|
|
3211
|
+
|
|
3212
|
+
|
|
3213
|
+
def window(
|
|
3214
|
+
timeColumn: "ColumnOrName",
|
|
3215
|
+
windowDuration: str,
|
|
3216
|
+
slideDuration: Optional[str] = None,
|
|
3217
|
+
startTime: Optional[str] = None,
|
|
3218
|
+
) -> Column:
|
|
3219
|
+
if windowDuration is None or not isinstance(windowDuration, str):
|
|
3220
|
+
raise PySparkTypeError(
|
|
3221
|
+
error_class="NOT_STR",
|
|
3222
|
+
message_parameters={
|
|
3223
|
+
"arg_name": "windowDuration",
|
|
3224
|
+
"arg_type": type(windowDuration).__name__,
|
|
3225
|
+
},
|
|
3226
|
+
)
|
|
3227
|
+
if slideDuration is not None and not isinstance(slideDuration, str):
|
|
3228
|
+
raise PySparkTypeError(
|
|
3229
|
+
error_class="NOT_STR",
|
|
3230
|
+
message_parameters={
|
|
3231
|
+
"arg_name": "slideDuration",
|
|
3232
|
+
"arg_type": type(slideDuration).__name__,
|
|
3233
|
+
},
|
|
3234
|
+
)
|
|
3235
|
+
if startTime is not None and not isinstance(startTime, str):
|
|
3236
|
+
raise PySparkTypeError(
|
|
3237
|
+
error_class="NOT_STR",
|
|
3238
|
+
message_parameters={"arg_name": "startTime", "arg_type": type(startTime).__name__},
|
|
3239
|
+
)
|
|
3240
|
+
|
|
3241
|
+
time_col = _to_col(timeColumn)
|
|
3242
|
+
|
|
3243
|
+
if slideDuration is not None and startTime is not None:
|
|
3244
|
+
return _invoke_function(
|
|
3245
|
+
"window", time_col, lit(windowDuration), lit(slideDuration), lit(startTime)
|
|
3246
|
+
)
|
|
3247
|
+
elif slideDuration is not None:
|
|
3248
|
+
return _invoke_function("window", time_col, lit(windowDuration), lit(slideDuration))
|
|
3249
|
+
elif startTime is not None:
|
|
3250
|
+
return _invoke_function(
|
|
3251
|
+
"window", time_col, lit(windowDuration), lit(windowDuration), lit(startTime)
|
|
3252
|
+
)
|
|
3253
|
+
else:
|
|
3254
|
+
return _invoke_function("window", time_col, lit(windowDuration))
|
|
3255
|
+
|
|
3256
|
+
|
|
3257
|
+
window.__doc__ = pysparkfuncs.window.__doc__
|
|
3258
|
+
|
|
3259
|
+
|
|
3260
|
+
def window_time(
|
|
3261
|
+
windowColumn: "ColumnOrName",
|
|
3262
|
+
) -> Column:
|
|
3263
|
+
return _invoke_function("window_time", _to_col(windowColumn))
|
|
3264
|
+
|
|
3265
|
+
|
|
3266
|
+
window_time.__doc__ = pysparkfuncs.window_time.__doc__
|
|
3267
|
+
|
|
3268
|
+
|
|
3269
|
+
def session_window(timeColumn: "ColumnOrName", gapDuration: Union[Column, str]) -> Column:
|
|
3270
|
+
if gapDuration is None or not isinstance(gapDuration, (Column, str)):
|
|
3271
|
+
raise PySparkTypeError(
|
|
3272
|
+
error_class="NOT_COLUMN_OR_STR",
|
|
3273
|
+
message_parameters={"arg_name": "gapDuration", "arg_type": type(gapDuration).__name__},
|
|
3274
|
+
)
|
|
3275
|
+
|
|
3276
|
+
time_col = _to_col(timeColumn)
|
|
3277
|
+
|
|
3278
|
+
if isinstance(gapDuration, Column):
|
|
3279
|
+
return _invoke_function("session_window", time_col, gapDuration)
|
|
3280
|
+
else:
|
|
3281
|
+
return _invoke_function("session_window", time_col, lit(gapDuration))
|
|
3282
|
+
|
|
3283
|
+
|
|
3284
|
+
session_window.__doc__ = pysparkfuncs.session_window.__doc__
|
|
3285
|
+
|
|
3286
|
+
|
|
3287
|
+
def to_unix_timestamp(
|
|
3288
|
+
timestamp: "ColumnOrName",
|
|
3289
|
+
format: Optional["ColumnOrName"] = None,
|
|
3290
|
+
) -> Column:
|
|
3291
|
+
if format is not None:
|
|
3292
|
+
return _invoke_function_over_columns("to_unix_timestamp", timestamp, format)
|
|
3293
|
+
else:
|
|
3294
|
+
return _invoke_function_over_columns("to_unix_timestamp", timestamp)
|
|
3295
|
+
|
|
3296
|
+
|
|
3297
|
+
to_unix_timestamp.__doc__ = pysparkfuncs.to_unix_timestamp.__doc__
|
|
3298
|
+
|
|
3299
|
+
|
|
3300
|
+
def to_timestamp_ltz(
|
|
3301
|
+
timestamp: "ColumnOrName",
|
|
3302
|
+
format: Optional["ColumnOrName"] = None,
|
|
3303
|
+
) -> Column:
|
|
3304
|
+
if format is not None:
|
|
3305
|
+
return _invoke_function_over_columns("to_timestamp_ltz", timestamp, format)
|
|
3306
|
+
else:
|
|
3307
|
+
return _invoke_function_over_columns("to_timestamp_ltz", timestamp)
|
|
3308
|
+
|
|
3309
|
+
|
|
3310
|
+
to_timestamp_ltz.__doc__ = pysparkfuncs.to_timestamp_ltz.__doc__
|
|
3311
|
+
|
|
3312
|
+
|
|
3313
|
+
def to_timestamp_ntz(
|
|
3314
|
+
timestamp: "ColumnOrName",
|
|
3315
|
+
format: Optional["ColumnOrName"] = None,
|
|
3316
|
+
) -> Column:
|
|
3317
|
+
if format is not None:
|
|
3318
|
+
return _invoke_function_over_columns("to_timestamp_ntz", timestamp, format)
|
|
3319
|
+
else:
|
|
3320
|
+
return _invoke_function_over_columns("to_timestamp_ntz", timestamp)
|
|
3321
|
+
|
|
3322
|
+
|
|
3323
|
+
to_timestamp_ntz.__doc__ = pysparkfuncs.to_timestamp_ntz.__doc__
|
|
3324
|
+
|
|
3325
|
+
|
|
3326
|
+
# Partition Transformation Functions
|
|
3327
|
+
|
|
3328
|
+
|
|
3329
|
+
def bucket(numBuckets: Union[Column, int], col: "ColumnOrName") -> Column:
|
|
3330
|
+
if isinstance(numBuckets, int):
|
|
3331
|
+
_numBuckets = lit(numBuckets)
|
|
3332
|
+
elif isinstance(numBuckets, Column):
|
|
3333
|
+
_numBuckets = numBuckets
|
|
3334
|
+
else:
|
|
3335
|
+
raise PySparkTypeError(
|
|
3336
|
+
error_class="NOT_COLUMN_OR_INT",
|
|
3337
|
+
message_parameters={
|
|
3338
|
+
"arg_name": "numBuckets",
|
|
3339
|
+
"arg_type": type(numBuckets).__name__,
|
|
3340
|
+
},
|
|
3341
|
+
)
|
|
3342
|
+
|
|
3343
|
+
return _invoke_function("bucket", _numBuckets, _to_col(col))
|
|
3344
|
+
|
|
3345
|
+
|
|
3346
|
+
bucket.__doc__ = pysparkfuncs.bucket.__doc__
|
|
3347
|
+
|
|
3348
|
+
|
|
3349
|
+
def years(col: "ColumnOrName") -> Column:
|
|
3350
|
+
return _invoke_function_over_columns("years", col)
|
|
3351
|
+
|
|
3352
|
+
|
|
3353
|
+
years.__doc__ = pysparkfuncs.years.__doc__
|
|
3354
|
+
|
|
3355
|
+
|
|
3356
|
+
def months(col: "ColumnOrName") -> Column:
|
|
3357
|
+
return _invoke_function_over_columns("months", col)
|
|
3358
|
+
|
|
3359
|
+
|
|
3360
|
+
months.__doc__ = pysparkfuncs.months.__doc__
|
|
3361
|
+
|
|
3362
|
+
|
|
3363
|
+
def days(col: "ColumnOrName") -> Column:
|
|
3364
|
+
return _invoke_function_over_columns("days", col)
|
|
3365
|
+
|
|
3366
|
+
|
|
3367
|
+
days.__doc__ = pysparkfuncs.days.__doc__
|
|
3368
|
+
|
|
3369
|
+
|
|
3370
|
+
def hours(col: "ColumnOrName") -> Column:
|
|
3371
|
+
return _invoke_function_over_columns("hours", col)
|
|
3372
|
+
|
|
3373
|
+
|
|
3374
|
+
hours.__doc__ = pysparkfuncs.hours.__doc__
|
|
3375
|
+
|
|
3376
|
+
|
|
3377
|
+
def convert_timezone(
|
|
3378
|
+
sourceTz: Optional[Column], targetTz: Column, sourceTs: "ColumnOrName"
|
|
3379
|
+
) -> Column:
|
|
3380
|
+
if sourceTz is None:
|
|
3381
|
+
return _invoke_function_over_columns("convert_timezone", targetTz, sourceTs)
|
|
3382
|
+
else:
|
|
3383
|
+
return _invoke_function_over_columns("convert_timezone", sourceTz, targetTz, sourceTs)
|
|
3384
|
+
|
|
3385
|
+
|
|
3386
|
+
convert_timezone.__doc__ = pysparkfuncs.convert_timezone.__doc__
|
|
3387
|
+
|
|
3388
|
+
|
|
3389
|
+
def make_dt_interval(
|
|
3390
|
+
days: Optional["ColumnOrName"] = None,
|
|
3391
|
+
hours: Optional["ColumnOrName"] = None,
|
|
3392
|
+
mins: Optional["ColumnOrName"] = None,
|
|
3393
|
+
secs: Optional["ColumnOrName"] = None,
|
|
3394
|
+
) -> Column:
|
|
3395
|
+
_days = lit(0) if days is None else _to_col(days)
|
|
3396
|
+
_hours = lit(0) if hours is None else _to_col(hours)
|
|
3397
|
+
_mins = lit(0) if mins is None else _to_col(mins)
|
|
3398
|
+
_secs = lit(decimal.Decimal(0)) if secs is None else _to_col(secs)
|
|
3399
|
+
|
|
3400
|
+
return _invoke_function_over_columns("make_dt_interval", _days, _hours, _mins, _secs)
|
|
3401
|
+
|
|
3402
|
+
|
|
3403
|
+
make_dt_interval.__doc__ = pysparkfuncs.make_dt_interval.__doc__
|
|
3404
|
+
|
|
3405
|
+
|
|
3406
|
+
def make_interval(
|
|
3407
|
+
years: Optional["ColumnOrName"] = None,
|
|
3408
|
+
months: Optional["ColumnOrName"] = None,
|
|
3409
|
+
weeks: Optional["ColumnOrName"] = None,
|
|
3410
|
+
days: Optional["ColumnOrName"] = None,
|
|
3411
|
+
hours: Optional["ColumnOrName"] = None,
|
|
3412
|
+
mins: Optional["ColumnOrName"] = None,
|
|
3413
|
+
secs: Optional["ColumnOrName"] = None,
|
|
3414
|
+
) -> Column:
|
|
3415
|
+
_years = lit(0) if years is None else _to_col(years)
|
|
3416
|
+
_months = lit(0) if months is None else _to_col(months)
|
|
3417
|
+
_weeks = lit(0) if weeks is None else _to_col(weeks)
|
|
3418
|
+
_days = lit(0) if days is None else _to_col(days)
|
|
3419
|
+
_hours = lit(0) if hours is None else _to_col(hours)
|
|
3420
|
+
_mins = lit(0) if mins is None else _to_col(mins)
|
|
3421
|
+
_secs = lit(decimal.Decimal(0)) if secs is None else _to_col(secs)
|
|
3422
|
+
|
|
3423
|
+
return _invoke_function_over_columns(
|
|
3424
|
+
"make_interval", _years, _months, _weeks, _days, _hours, _mins, _secs
|
|
3425
|
+
)
|
|
3426
|
+
|
|
3427
|
+
|
|
3428
|
+
make_interval.__doc__ = pysparkfuncs.make_interval.__doc__
|
|
3429
|
+
|
|
3430
|
+
|
|
3431
|
+
def make_timestamp(
|
|
3432
|
+
years: "ColumnOrName",
|
|
3433
|
+
months: "ColumnOrName",
|
|
3434
|
+
days: "ColumnOrName",
|
|
3435
|
+
hours: "ColumnOrName",
|
|
3436
|
+
mins: "ColumnOrName",
|
|
3437
|
+
secs: "ColumnOrName",
|
|
3438
|
+
timezone: Optional["ColumnOrName"] = None,
|
|
3439
|
+
) -> Column:
|
|
3440
|
+
if timezone is not None:
|
|
3441
|
+
return _invoke_function_over_columns(
|
|
3442
|
+
"make_timestamp", years, months, days, hours, mins, secs, timezone
|
|
3443
|
+
)
|
|
3444
|
+
else:
|
|
3445
|
+
return _invoke_function_over_columns(
|
|
3446
|
+
"make_timestamp", years, months, days, hours, mins, secs
|
|
3447
|
+
)
|
|
3448
|
+
|
|
3449
|
+
|
|
3450
|
+
make_timestamp.__doc__ = pysparkfuncs.make_timestamp.__doc__
|
|
3451
|
+
|
|
3452
|
+
|
|
3453
|
+
def make_timestamp_ltz(
|
|
3454
|
+
years: "ColumnOrName",
|
|
3455
|
+
months: "ColumnOrName",
|
|
3456
|
+
days: "ColumnOrName",
|
|
3457
|
+
hours: "ColumnOrName",
|
|
3458
|
+
mins: "ColumnOrName",
|
|
3459
|
+
secs: "ColumnOrName",
|
|
3460
|
+
timezone: Optional["ColumnOrName"] = None,
|
|
3461
|
+
) -> Column:
|
|
3462
|
+
if timezone is not None:
|
|
3463
|
+
return _invoke_function_over_columns(
|
|
3464
|
+
"make_timestamp_ltz", years, months, days, hours, mins, secs, timezone
|
|
3465
|
+
)
|
|
3466
|
+
else:
|
|
3467
|
+
return _invoke_function_over_columns(
|
|
3468
|
+
"make_timestamp_ltz", years, months, days, hours, mins, secs
|
|
3469
|
+
)
|
|
3470
|
+
|
|
3471
|
+
|
|
3472
|
+
make_timestamp_ltz.__doc__ = pysparkfuncs.make_timestamp_ltz.__doc__
|
|
3473
|
+
|
|
3474
|
+
|
|
3475
|
+
def make_timestamp_ntz(
|
|
3476
|
+
years: "ColumnOrName",
|
|
3477
|
+
months: "ColumnOrName",
|
|
3478
|
+
days: "ColumnOrName",
|
|
3479
|
+
hours: "ColumnOrName",
|
|
3480
|
+
mins: "ColumnOrName",
|
|
3481
|
+
secs: "ColumnOrName",
|
|
3482
|
+
) -> Column:
|
|
3483
|
+
return _invoke_function_over_columns(
|
|
3484
|
+
"make_timestamp_ntz", years, months, days, hours, mins, secs
|
|
3485
|
+
)
|
|
3486
|
+
|
|
3487
|
+
|
|
3488
|
+
make_timestamp_ntz.__doc__ = pysparkfuncs.make_timestamp_ntz.__doc__
|
|
3489
|
+
|
|
3490
|
+
|
|
3491
|
+
def make_ym_interval(
|
|
3492
|
+
years: Optional["ColumnOrName"] = None,
|
|
3493
|
+
months: Optional["ColumnOrName"] = None,
|
|
3494
|
+
) -> Column:
|
|
3495
|
+
_years = lit(0) if years is None else _to_col(years)
|
|
3496
|
+
_months = lit(0) if months is None else _to_col(months)
|
|
3497
|
+
return _invoke_function_over_columns("make_ym_interval", _years, _months)
|
|
3498
|
+
|
|
3499
|
+
|
|
3500
|
+
make_ym_interval.__doc__ = pysparkfuncs.make_ym_interval.__doc__
|
|
3501
|
+
|
|
3502
|
+
# Misc Functions
|
|
3503
|
+
|
|
3504
|
+
|
|
3505
|
+
def current_catalog() -> Column:
|
|
3506
|
+
return _invoke_function("current_catalog")
|
|
3507
|
+
|
|
3508
|
+
|
|
3509
|
+
current_catalog.__doc__ = pysparkfuncs.current_catalog.__doc__
|
|
3510
|
+
|
|
3511
|
+
|
|
3512
|
+
def current_database() -> Column:
|
|
3513
|
+
return _invoke_function("current_database")
|
|
3514
|
+
|
|
3515
|
+
|
|
3516
|
+
current_database.__doc__ = pysparkfuncs.current_database.__doc__
|
|
3517
|
+
|
|
3518
|
+
|
|
3519
|
+
def current_schema() -> Column:
|
|
3520
|
+
return _invoke_function("current_schema")
|
|
3521
|
+
|
|
3522
|
+
|
|
3523
|
+
current_schema.__doc__ = pysparkfuncs.current_schema.__doc__
|
|
3524
|
+
|
|
3525
|
+
|
|
3526
|
+
def current_user() -> Column:
|
|
3527
|
+
return _invoke_function("current_user")
|
|
3528
|
+
|
|
3529
|
+
|
|
3530
|
+
current_user.__doc__ = pysparkfuncs.current_user.__doc__
|
|
3531
|
+
|
|
3532
|
+
|
|
3533
|
+
def user() -> Column:
|
|
3534
|
+
return _invoke_function("user")
|
|
3535
|
+
|
|
3536
|
+
|
|
3537
|
+
user.__doc__ = pysparkfuncs.user.__doc__
|
|
3538
|
+
|
|
3539
|
+
|
|
3540
|
+
def assert_true(col: "ColumnOrName", errMsg: Optional[Union[Column, str]] = None) -> Column:
|
|
3541
|
+
if errMsg is None:
|
|
3542
|
+
return _invoke_function_over_columns("assert_true", col)
|
|
3543
|
+
if not isinstance(errMsg, (str, Column)):
|
|
3544
|
+
raise PySparkTypeError(
|
|
3545
|
+
error_class="NOT_COLUMN_OR_STR",
|
|
3546
|
+
message_parameters={"arg_name": "errMsg", "arg_type": type(errMsg).__name__},
|
|
3547
|
+
)
|
|
3548
|
+
_err_msg = lit(errMsg) if isinstance(errMsg, str) else _to_col(errMsg)
|
|
3549
|
+
return _invoke_function("assert_true", _to_col(col), _err_msg)
|
|
3550
|
+
|
|
3551
|
+
|
|
3552
|
+
assert_true.__doc__ = pysparkfuncs.assert_true.__doc__
|
|
3553
|
+
|
|
3554
|
+
|
|
3555
|
+
def raise_error(errMsg: Union[Column, str]) -> Column:
|
|
3556
|
+
if not isinstance(errMsg, (str, Column)):
|
|
3557
|
+
raise PySparkTypeError(
|
|
3558
|
+
error_class="NOT_COLUMN_OR_STR",
|
|
3559
|
+
message_parameters={"arg_name": "errMsg", "arg_type": type(errMsg).__name__},
|
|
3560
|
+
)
|
|
3561
|
+
_err_msg = lit(errMsg) if isinstance(errMsg, str) else _to_col(errMsg)
|
|
3562
|
+
return _invoke_function("raise_error", _err_msg)
|
|
3563
|
+
|
|
3564
|
+
|
|
3565
|
+
raise_error.__doc__ = pysparkfuncs.raise_error.__doc__
|
|
3566
|
+
|
|
3567
|
+
|
|
3568
|
+
def crc32(col: "ColumnOrName") -> Column:
|
|
3569
|
+
return _invoke_function_over_columns("crc32", col)
|
|
3570
|
+
|
|
3571
|
+
|
|
3572
|
+
crc32.__doc__ = pysparkfuncs.crc32.__doc__
|
|
3573
|
+
|
|
3574
|
+
|
|
3575
|
+
def hash(*cols: "ColumnOrName") -> Column:
|
|
3576
|
+
return _invoke_function_over_columns("hash", *cols)
|
|
3577
|
+
|
|
3578
|
+
|
|
3579
|
+
hash.__doc__ = pysparkfuncs.hash.__doc__
|
|
3580
|
+
|
|
3581
|
+
|
|
3582
|
+
def xxhash64(*cols: "ColumnOrName") -> Column:
|
|
3583
|
+
return _invoke_function_over_columns("xxhash64", *cols)
|
|
3584
|
+
|
|
3585
|
+
|
|
3586
|
+
xxhash64.__doc__ = pysparkfuncs.xxhash64.__doc__
|
|
3587
|
+
|
|
3588
|
+
|
|
3589
|
+
def md5(col: "ColumnOrName") -> Column:
|
|
3590
|
+
return _invoke_function_over_columns("md5", col)
|
|
3591
|
+
|
|
3592
|
+
|
|
3593
|
+
md5.__doc__ = pysparkfuncs.md5.__doc__
|
|
3594
|
+
|
|
3595
|
+
|
|
3596
|
+
def sha1(col: "ColumnOrName") -> Column:
|
|
3597
|
+
return _invoke_function_over_columns("sha1", col)
|
|
3598
|
+
|
|
3599
|
+
|
|
3600
|
+
sha1.__doc__ = pysparkfuncs.sha1.__doc__
|
|
3601
|
+
|
|
3602
|
+
|
|
3603
|
+
def sha2(col: "ColumnOrName", numBits: int) -> Column:
|
|
3604
|
+
return _invoke_function("sha2", _to_col(col), lit(numBits))
|
|
3605
|
+
|
|
3606
|
+
|
|
3607
|
+
sha2.__doc__ = pysparkfuncs.sha2.__doc__
|
|
3608
|
+
|
|
3609
|
+
|
|
3610
|
+
def hll_sketch_agg(col: "ColumnOrName", lgConfigK: Optional[Union[int, Column]] = None) -> Column:
|
|
3611
|
+
if lgConfigK is None:
|
|
3612
|
+
return _invoke_function_over_columns("hll_sketch_agg", col)
|
|
3613
|
+
else:
|
|
3614
|
+
_lgConfigK = lit(lgConfigK) if isinstance(lgConfigK, int) else lgConfigK
|
|
3615
|
+
return _invoke_function_over_columns("hll_sketch_agg", col, _lgConfigK)
|
|
3616
|
+
|
|
3617
|
+
|
|
3618
|
+
hll_sketch_agg.__doc__ = pysparkfuncs.hll_sketch_agg.__doc__
|
|
3619
|
+
|
|
3620
|
+
|
|
3621
|
+
def hll_union_agg(col: "ColumnOrName", allowDifferentLgConfigK: Optional[bool] = None) -> Column:
|
|
3622
|
+
if allowDifferentLgConfigK is None:
|
|
3623
|
+
return _invoke_function_over_columns("hll_union_agg", col)
|
|
3624
|
+
else:
|
|
3625
|
+
_allowDifferentLgConfigK = (
|
|
3626
|
+
lit(allowDifferentLgConfigK)
|
|
3627
|
+
if isinstance(allowDifferentLgConfigK, bool)
|
|
3628
|
+
else allowDifferentLgConfigK
|
|
3629
|
+
)
|
|
3630
|
+
return _invoke_function_over_columns("hll_union_agg", col, _allowDifferentLgConfigK)
|
|
3631
|
+
|
|
3632
|
+
|
|
3633
|
+
hll_union_agg.__doc__ = pysparkfuncs.hll_union_agg.__doc__
|
|
3634
|
+
|
|
3635
|
+
|
|
3636
|
+
def hll_sketch_estimate(col: "ColumnOrName") -> Column:
|
|
3637
|
+
return _invoke_function("hll_sketch_estimate", _to_col(col))
|
|
3638
|
+
|
|
3639
|
+
|
|
3640
|
+
hll_sketch_estimate.__doc__ = pysparkfuncs.hll_sketch_estimate.__doc__
|
|
3641
|
+
|
|
3642
|
+
|
|
3643
|
+
def hll_union(
|
|
3644
|
+
col1: "ColumnOrName", col2: "ColumnOrName", allowDifferentLgConfigK: Optional[bool] = None
|
|
3645
|
+
) -> Column:
|
|
3646
|
+
if allowDifferentLgConfigK is not None:
|
|
3647
|
+
return _invoke_function(
|
|
3648
|
+
"hll_union", _to_col(col1), _to_col(col2), lit(allowDifferentLgConfigK)
|
|
3649
|
+
)
|
|
3650
|
+
else:
|
|
3651
|
+
return _invoke_function("hll_union", _to_col(col1), _to_col(col2))
|
|
3652
|
+
|
|
3653
|
+
|
|
3654
|
+
hll_union.__doc__ = pysparkfuncs.hll_union.__doc__
|
|
3655
|
+
|
|
3656
|
+
|
|
3657
|
+
# Predicates Function
|
|
3658
|
+
|
|
3659
|
+
|
|
3660
|
+
def ifnull(col1: "ColumnOrName", col2: "ColumnOrName") -> Column:
|
|
3661
|
+
return _invoke_function_over_columns("ifnull", col1, col2)
|
|
3662
|
+
|
|
3663
|
+
|
|
3664
|
+
ifnull.__doc__ = pysparkfuncs.ifnull.__doc__
|
|
3665
|
+
|
|
3666
|
+
|
|
3667
|
+
def isnotnull(col: "ColumnOrName") -> Column:
|
|
3668
|
+
return _invoke_function_over_columns("isnotnull", col)
|
|
3669
|
+
|
|
3670
|
+
|
|
3671
|
+
isnotnull.__doc__ = pysparkfuncs.isnotnull.__doc__
|
|
3672
|
+
|
|
3673
|
+
|
|
3674
|
+
def equal_null(col1: "ColumnOrName", col2: "ColumnOrName") -> Column:
|
|
3675
|
+
return _invoke_function_over_columns("equal_null", col1, col2)
|
|
3676
|
+
|
|
3677
|
+
|
|
3678
|
+
equal_null.__doc__ = pysparkfuncs.equal_null.__doc__
|
|
3679
|
+
|
|
3680
|
+
|
|
3681
|
+
def nullif(col1: "ColumnOrName", col2: "ColumnOrName") -> Column:
|
|
3682
|
+
return _invoke_function_over_columns("nullif", col1, col2)
|
|
3683
|
+
|
|
3684
|
+
|
|
3685
|
+
nullif.__doc__ = pysparkfuncs.nullif.__doc__
|
|
3686
|
+
|
|
3687
|
+
|
|
3688
|
+
def nvl(col1: "ColumnOrName", col2: "ColumnOrName") -> Column:
|
|
3689
|
+
return _invoke_function_over_columns("nvl", col1, col2)
|
|
3690
|
+
|
|
3691
|
+
|
|
3692
|
+
nvl.__doc__ = pysparkfuncs.nvl.__doc__
|
|
3693
|
+
|
|
3694
|
+
|
|
3695
|
+
def nvl2(col1: "ColumnOrName", col2: "ColumnOrName", col3: "ColumnOrName") -> Column:
|
|
3696
|
+
return _invoke_function_over_columns("nvl2", col1, col2, col3)
|
|
3697
|
+
|
|
3698
|
+
|
|
3699
|
+
nvl2.__doc__ = pysparkfuncs.nvl2.__doc__
|
|
3700
|
+
|
|
3701
|
+
|
|
3702
|
+
def aes_encrypt(
|
|
3703
|
+
input: "ColumnOrName",
|
|
3704
|
+
key: "ColumnOrName",
|
|
3705
|
+
mode: Optional["ColumnOrName"] = None,
|
|
3706
|
+
padding: Optional["ColumnOrName"] = None,
|
|
3707
|
+
iv: Optional["ColumnOrName"] = None,
|
|
3708
|
+
aad: Optional["ColumnOrName"] = None,
|
|
3709
|
+
) -> Column:
|
|
3710
|
+
_mode = lit("GCM") if mode is None else _to_col(mode)
|
|
3711
|
+
_padding = lit("DEFAULT") if padding is None else _to_col(padding)
|
|
3712
|
+
_iv = lit("") if iv is None else _to_col(iv)
|
|
3713
|
+
_aad = lit("") if aad is None else _to_col(aad)
|
|
3714
|
+
|
|
3715
|
+
return _invoke_function_over_columns("aes_encrypt", input, key, _mode, _padding, _iv, _aad)
|
|
3716
|
+
|
|
3717
|
+
|
|
3718
|
+
aes_encrypt.__doc__ = pysparkfuncs.aes_encrypt.__doc__
|
|
3719
|
+
|
|
3720
|
+
|
|
3721
|
+
def aes_decrypt(
|
|
3722
|
+
input: "ColumnOrName",
|
|
3723
|
+
key: "ColumnOrName",
|
|
3724
|
+
mode: Optional["ColumnOrName"] = None,
|
|
3725
|
+
padding: Optional["ColumnOrName"] = None,
|
|
3726
|
+
aad: Optional["ColumnOrName"] = None,
|
|
3727
|
+
) -> Column:
|
|
3728
|
+
_mode = lit("GCM") if mode is None else _to_col(mode)
|
|
3729
|
+
_padding = lit("DEFAULT") if padding is None else _to_col(padding)
|
|
3730
|
+
_aad = lit("") if aad is None else _to_col(aad)
|
|
3731
|
+
|
|
3732
|
+
return _invoke_function_over_columns("aes_decrypt", input, key, _mode, _padding, _aad)
|
|
3733
|
+
|
|
3734
|
+
|
|
3735
|
+
aes_decrypt.__doc__ = pysparkfuncs.aes_decrypt.__doc__
|
|
3736
|
+
|
|
3737
|
+
|
|
3738
|
+
def try_aes_decrypt(
|
|
3739
|
+
input: "ColumnOrName",
|
|
3740
|
+
key: "ColumnOrName",
|
|
3741
|
+
mode: Optional["ColumnOrName"] = None,
|
|
3742
|
+
padding: Optional["ColumnOrName"] = None,
|
|
3743
|
+
aad: Optional["ColumnOrName"] = None,
|
|
3744
|
+
) -> Column:
|
|
3745
|
+
_mode = lit("GCM") if mode is None else _to_col(mode)
|
|
3746
|
+
_padding = lit("DEFAULT") if padding is None else _to_col(padding)
|
|
3747
|
+
_aad = lit("") if aad is None else _to_col(aad)
|
|
3748
|
+
|
|
3749
|
+
return _invoke_function_over_columns("try_aes_decrypt", input, key, _mode, _padding, _aad)
|
|
3750
|
+
|
|
3751
|
+
|
|
3752
|
+
try_aes_decrypt.__doc__ = pysparkfuncs.try_aes_decrypt.__doc__
|
|
3753
|
+
|
|
3754
|
+
|
|
3755
|
+
def sha(col: "ColumnOrName") -> Column:
|
|
3756
|
+
return _invoke_function_over_columns("sha", col)
|
|
3757
|
+
|
|
3758
|
+
|
|
3759
|
+
sha.__doc__ = pysparkfuncs.sha.__doc__
|
|
3760
|
+
|
|
3761
|
+
|
|
3762
|
+
def input_file_block_length() -> Column:
|
|
3763
|
+
return _invoke_function_over_columns("input_file_block_length")
|
|
3764
|
+
|
|
3765
|
+
|
|
3766
|
+
input_file_block_length.__doc__ = pysparkfuncs.input_file_block_length.__doc__
|
|
3767
|
+
|
|
3768
|
+
|
|
3769
|
+
def input_file_block_start() -> Column:
|
|
3770
|
+
return _invoke_function_over_columns("input_file_block_start")
|
|
3771
|
+
|
|
3772
|
+
|
|
3773
|
+
input_file_block_start.__doc__ = pysparkfuncs.input_file_block_start.__doc__
|
|
3774
|
+
|
|
3775
|
+
|
|
3776
|
+
def reflect(*cols: "ColumnOrName") -> Column:
|
|
3777
|
+
return _invoke_function_over_columns("reflect", *cols)
|
|
3778
|
+
|
|
3779
|
+
|
|
3780
|
+
reflect.__doc__ = pysparkfuncs.reflect.__doc__
|
|
3781
|
+
|
|
3782
|
+
|
|
3783
|
+
def java_method(*cols: "ColumnOrName") -> Column:
|
|
3784
|
+
return _invoke_function_over_columns("java_method", *cols)
|
|
3785
|
+
|
|
3786
|
+
|
|
3787
|
+
java_method.__doc__ = pysparkfuncs.java_method.__doc__
|
|
3788
|
+
|
|
3789
|
+
|
|
3790
|
+
def version() -> Column:
|
|
3791
|
+
return _invoke_function_over_columns("version")
|
|
3792
|
+
|
|
3793
|
+
|
|
3794
|
+
version.__doc__ = pysparkfuncs.version.__doc__
|
|
3795
|
+
|
|
3796
|
+
|
|
3797
|
+
def typeof(col: "ColumnOrName") -> Column:
|
|
3798
|
+
return _invoke_function_over_columns("typeof", col)
|
|
3799
|
+
|
|
3800
|
+
|
|
3801
|
+
typeof.__doc__ = pysparkfuncs.typeof.__doc__
|
|
3802
|
+
|
|
3803
|
+
|
|
3804
|
+
def stack(*cols: "ColumnOrName") -> Column:
|
|
3805
|
+
return _invoke_function_over_columns("stack", *cols)
|
|
3806
|
+
|
|
3807
|
+
|
|
3808
|
+
stack.__doc__ = pysparkfuncs.stack.__doc__
|
|
3809
|
+
|
|
3810
|
+
|
|
3811
|
+
def bitmap_bit_position(col: "ColumnOrName") -> Column:
|
|
3812
|
+
return _invoke_function_over_columns("bitmap_bit_position", col)
|
|
3813
|
+
|
|
3814
|
+
|
|
3815
|
+
bitmap_bit_position.__doc__ = pysparkfuncs.bitmap_bit_position.__doc__
|
|
3816
|
+
|
|
3817
|
+
|
|
3818
|
+
def bitmap_bucket_number(col: "ColumnOrName") -> Column:
|
|
3819
|
+
return _invoke_function_over_columns("bitmap_bucket_number", col)
|
|
3820
|
+
|
|
3821
|
+
|
|
3822
|
+
bitmap_bucket_number.__doc__ = pysparkfuncs.bitmap_bucket_number.__doc__
|
|
3823
|
+
|
|
3824
|
+
|
|
3825
|
+
def bitmap_construct_agg(col: "ColumnOrName") -> Column:
|
|
3826
|
+
return _invoke_function_over_columns("bitmap_construct_agg", col)
|
|
3827
|
+
|
|
3828
|
+
|
|
3829
|
+
bitmap_construct_agg.__doc__ = pysparkfuncs.bitmap_construct_agg.__doc__
|
|
3830
|
+
|
|
3831
|
+
|
|
3832
|
+
def bitmap_count(col: "ColumnOrName") -> Column:
|
|
3833
|
+
return _invoke_function_over_columns("bitmap_count", col)
|
|
3834
|
+
|
|
3835
|
+
|
|
3836
|
+
bitmap_count.__doc__ = pysparkfuncs.bitmap_count.__doc__
|
|
3837
|
+
|
|
3838
|
+
|
|
3839
|
+
def bitmap_or_agg(col: "ColumnOrName") -> Column:
|
|
3840
|
+
return _invoke_function_over_columns("bitmap_or_agg", col)
|
|
3841
|
+
|
|
3842
|
+
|
|
3843
|
+
bitmap_or_agg.__doc__ = pysparkfuncs.bitmap_or_agg.__doc__
|
|
3844
|
+
|
|
3845
|
+
|
|
3846
|
+
# Call Functions
|
|
3847
|
+
|
|
3848
|
+
|
|
3849
|
+
def call_udf(udfName: str, *cols: "ColumnOrName") -> Column:
|
|
3850
|
+
return _invoke_function(udfName, *[_to_col(c) for c in cols])
|
|
3851
|
+
|
|
3852
|
+
|
|
3853
|
+
call_udf.__doc__ = pysparkfuncs.call_udf.__doc__
|
|
3854
|
+
|
|
3855
|
+
|
|
3856
|
+
def unwrap_udt(col: "ColumnOrName") -> Column:
|
|
3857
|
+
return _invoke_function("unwrap_udt", _to_col(col))
|
|
3858
|
+
|
|
3859
|
+
|
|
3860
|
+
unwrap_udt.__doc__ = pysparkfuncs.unwrap_udt.__doc__
|
|
3861
|
+
|
|
3862
|
+
|
|
3863
|
+
def udf(
|
|
3864
|
+
f: Optional[Union[Callable[..., Any], "DataTypeOrString"]] = None,
|
|
3865
|
+
returnType: "DataTypeOrString" = StringType(),
|
|
3866
|
+
useArrow: Optional[bool] = None,
|
|
3867
|
+
) -> Union["UserDefinedFunctionLike", Callable[[Callable[..., Any]], "UserDefinedFunctionLike"]]:
|
|
3868
|
+
if f is None or isinstance(f, (str, DataType)):
|
|
3869
|
+
# If DataType has been passed as a positional argument
|
|
3870
|
+
# for decorator use it as a returnType
|
|
3871
|
+
return_type = f or returnType
|
|
3872
|
+
return functools.partial(
|
|
3873
|
+
_create_py_udf,
|
|
3874
|
+
returnType=return_type,
|
|
3875
|
+
useArrow=useArrow,
|
|
3876
|
+
)
|
|
3877
|
+
else:
|
|
3878
|
+
return _create_py_udf(f=f, returnType=returnType, useArrow=useArrow)
|
|
3879
|
+
|
|
3880
|
+
|
|
3881
|
+
udf.__doc__ = pysparkfuncs.udf.__doc__
|
|
3882
|
+
|
|
3883
|
+
|
|
3884
|
+
def udtf(
|
|
3885
|
+
cls: Optional[Type] = None,
|
|
3886
|
+
*,
|
|
3887
|
+
returnType: Union[StructType, str],
|
|
3888
|
+
useArrow: Optional[bool] = None,
|
|
3889
|
+
) -> Union["UserDefinedTableFunction", Callable[[Type], "UserDefinedTableFunction"]]:
|
|
3890
|
+
if cls is None:
|
|
3891
|
+
return functools.partial(_create_py_udtf, returnType=returnType, useArrow=useArrow)
|
|
3892
|
+
else:
|
|
3893
|
+
return _create_py_udtf(cls=cls, returnType=returnType, useArrow=useArrow)
|
|
3894
|
+
|
|
3895
|
+
|
|
3896
|
+
udtf.__doc__ = pysparkfuncs.udtf.__doc__
|
|
3897
|
+
|
|
3898
|
+
|
|
3899
|
+
def call_function(funcName: str, *cols: "ColumnOrName") -> Column:
|
|
3900
|
+
expressions = [_to_col(c)._expr for c in cols]
|
|
3901
|
+
return Column(CallFunction(funcName, expressions))
|
|
3902
|
+
|
|
3903
|
+
|
|
3904
|
+
call_function.__doc__ = pysparkfuncs.call_function.__doc__
|
|
3905
|
+
|
|
3906
|
+
|
|
3907
|
+
def _test() -> None:
|
|
3908
|
+
import sys
|
|
3909
|
+
import os
|
|
3910
|
+
import doctest
|
|
3911
|
+
from pyspark.sql import SparkSession as PySparkSession
|
|
3912
|
+
import pyspark.sql.connect.functions
|
|
3913
|
+
|
|
3914
|
+
globs = pyspark.sql.connect.functions.__dict__.copy()
|
|
3915
|
+
|
|
3916
|
+
globs["spark"] = (
|
|
3917
|
+
PySparkSession.builder.appName("sql.connect.functions tests")
|
|
3918
|
+
.remote(os.environ.get("SPARK_CONNECT_TESTING_REMOTE", "local[4]"))
|
|
3919
|
+
.getOrCreate()
|
|
3920
|
+
)
|
|
3921
|
+
|
|
3922
|
+
(failure_count, test_count) = doctest.testmod(
|
|
3923
|
+
pyspark.sql.connect.functions,
|
|
3924
|
+
globs=globs,
|
|
3925
|
+
optionflags=doctest.ELLIPSIS
|
|
3926
|
+
| doctest.NORMALIZE_WHITESPACE
|
|
3927
|
+
| doctest.IGNORE_EXCEPTION_DETAIL,
|
|
3928
|
+
)
|
|
3929
|
+
|
|
3930
|
+
globs["spark"].stop()
|
|
3931
|
+
|
|
3932
|
+
if failure_count:
|
|
3933
|
+
sys.exit(-1)
|
|
3934
|
+
|
|
3935
|
+
|
|
3936
|
+
if __name__ == "__main__":
|
|
3937
|
+
_test()
|