PyPI - snowpark-connect - Versions diffs - 0.33.0__py3-none-any.whl → 1.0.0__py3-none-any.whl - Mend

snowpark-connect 0.33.0py3-none-any.whl → 1.0.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of snowpark-connect might be problematic. Click here for more details.

Files changed (39) hide show

snowflake/snowpark_connect/includes/python/pyspark/pandas/spark/functions.py DELETED Viewed

@@ -1,203 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-"""
-Additional Spark functions used in pandas-on-Spark.
-"""
-from typing import Union
-from pyspark import SparkContext
-import pyspark.sql.functions as F
-from pyspark.sql.column import Column
-# For supporting Spark Connect
-from pyspark.sql.utils import is_remote
-def product(col: Column, dropna: bool) -> Column:
-    if is_remote():
-        from pyspark.sql.connect.functions import _invoke_function_over_columns, lit
-        return _invoke_function_over_columns(  # type: ignore[return-value]
-            "pandas_product",
-            col,  # type: ignore[arg-type]
-            lit(dropna),
-        )
-    else:
-        sc = SparkContext._active_spark_context
-        return Column(sc._jvm.PythonSQLUtils.pandasProduct(col._jc, dropna))
-def stddev(col: Column, ddof: int) -> Column:
-    if is_remote():
-        from pyspark.sql.connect.functions import _invoke_function_over_columns, lit
-        return _invoke_function_over_columns(  # type: ignore[return-value]
-            "pandas_stddev",
-            col,  # type: ignore[arg-type]
-            lit(ddof),
-        )
-    else:
-        sc = SparkContext._active_spark_context
-        return Column(sc._jvm.PythonSQLUtils.pandasStddev(col._jc, ddof))
-def var(col: Column, ddof: int) -> Column:
-    if is_remote():
-        from pyspark.sql.connect.functions import _invoke_function_over_columns, lit
-        return _invoke_function_over_columns(  # type: ignore[return-value]
-            "pandas_var",
-            col,  # type: ignore[arg-type]
-            lit(ddof),
-        )
-    else:
-        sc = SparkContext._active_spark_context
-        return Column(sc._jvm.PythonSQLUtils.pandasVariance(col._jc, ddof))
-def skew(col: Column) -> Column:
-    if is_remote():
-        from pyspark.sql.connect.functions import _invoke_function_over_columns
-        return _invoke_function_over_columns(  # type: ignore[return-value]
-            "pandas_skew",
-            col,  # type: ignore[arg-type]
-        )
-    else:
-        sc = SparkContext._active_spark_context
-        return Column(sc._jvm.PythonSQLUtils.pandasSkewness(col._jc))
-def kurt(col: Column) -> Column:
-    if is_remote():
-        from pyspark.sql.connect.functions import _invoke_function_over_columns
-        return _invoke_function_over_columns(  # type: ignore[return-value]
-            "pandas_kurt",
-            col,  # type: ignore[arg-type]
-        )
-    else:
-        sc = SparkContext._active_spark_context
-        return Column(sc._jvm.PythonSQLUtils.pandasKurtosis(col._jc))
-def mode(col: Column, dropna: bool) -> Column:
-    if is_remote():
-        from pyspark.sql.connect.functions import _invoke_function_over_columns, lit
-        return _invoke_function_over_columns(  # type: ignore[return-value]
-            "pandas_mode",
-            col,  # type: ignore[arg-type]
-            lit(dropna),
-        )
-    else:
-        sc = SparkContext._active_spark_context
-        return Column(sc._jvm.PythonSQLUtils.pandasMode(col._jc, dropna))
-def covar(col1: Column, col2: Column, ddof: int) -> Column:
-    if is_remote():
-        from pyspark.sql.connect.functions import _invoke_function_over_columns, lit
-        return _invoke_function_over_columns(  # type: ignore[return-value]
-            "pandas_covar",
-            col1,  # type: ignore[arg-type]
-            col2,  # type: ignore[arg-type]
-            lit(ddof),
-        )
-    else:
-        sc = SparkContext._active_spark_context
-        return Column(sc._jvm.PythonSQLUtils.pandasCovar(col1._jc, col2._jc, ddof))
-def repeat(col: Column, n: Union[int, Column]) -> Column:
-    """
-    Repeats a string column n times, and returns it as a new string column.
-    """
-    _n = F.lit(n) if isinstance(n, int) else n
-    return F.call_udf("repeat", col, _n)
-def ewm(col: Column, alpha: float, ignore_na: bool) -> Column:
-    if is_remote():
-        from pyspark.sql.connect.functions import _invoke_function_over_columns, lit
-        return _invoke_function_over_columns(  # type: ignore[return-value]
-            "ewm",
-            col,  # type: ignore[arg-type]
-            lit(alpha),
-            lit(ignore_na),
-        )
-    else:
-        sc = SparkContext._active_spark_context
-        return Column(sc._jvm.PythonSQLUtils.ewm(col._jc, alpha, ignore_na))
-def last_non_null(col: Column) -> Column:
-    if is_remote():
-        from pyspark.sql.connect.functions import _invoke_function_over_columns
-        return _invoke_function_over_columns(  # type: ignore[return-value]
-            "last_non_null",
-            col,  # type: ignore[arg-type]
-        )
-    else:
-        sc = SparkContext._active_spark_context
-        return Column(sc._jvm.PythonSQLUtils.lastNonNull(col._jc))
-def null_index(col: Column) -> Column:
-    if is_remote():
-        from pyspark.sql.connect.functions import _invoke_function_over_columns
-        return _invoke_function_over_columns(  # type: ignore[return-value]
-            "null_index",
-            col,  # type: ignore[arg-type]
-        )
-    else:
-        sc = SparkContext._active_spark_context
-        return Column(sc._jvm.PythonSQLUtils.nullIndex(col._jc))
-def timestampdiff(unit: str, start: Column, end: Column) -> Column:
-    if is_remote():
-        from pyspark.sql.connect.functions import _invoke_function_over_columns, lit
-        return _invoke_function_over_columns(  # type: ignore[return-value]
-            "timestampdiff",
-            lit(unit),
-            start,  # type: ignore[arg-type]
-            end,  # type: ignore[arg-type]
-        )
-    else:
-        sc = SparkContext._active_spark_context
-        return Column(sc._jvm.PythonSQLUtils.timestampDiff(unit, start._jc, end._jc))

snowflake/snowpark_connect/includes/python/pyspark/pandas/spark/utils.py DELETED Viewed

@@ -1,202 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-"""
-Helpers and utilities to deal with PySpark instances
-"""
-from typing import overload
-from pyspark.sql.types import DecimalType, StructType, MapType, ArrayType, StructField, DataType
-@overload
-def as_nullable_spark_type(dt: StructType) -> StructType:
-    ...
-@overload
-def as_nullable_spark_type(dt: ArrayType) -> ArrayType:
-    ...
-@overload
-def as_nullable_spark_type(dt: MapType) -> MapType:
-    ...
-@overload
-def as_nullable_spark_type(dt: DataType) -> DataType:
-    ...
-def as_nullable_spark_type(dt: DataType) -> DataType:
-    """
-    Returns a nullable schema or data types.
-    Examples
-    --------
-    >>> from pyspark.sql.types import *
-    >>> as_nullable_spark_type(StructType([
-    ...     StructField("A", IntegerType(), True),
-    ...     StructField("B", FloatType(), False)]))  # doctest: +NORMALIZE_WHITESPACE
-    StructType([StructField('A', IntegerType(), True), StructField('B', FloatType(), True)])
-    >>> as_nullable_spark_type(StructType([
-    ...     StructField("A",
-    ...         StructType([
-    ...             StructField('a',
-    ...                 MapType(IntegerType(),
-    ...                 ArrayType(IntegerType(), False), False), False),
-    ...             StructField('b', StringType(), True)])),
-    ...     StructField("B", FloatType(), False)]))  # doctest: +NORMALIZE_WHITESPACE
-    StructType([StructField('A',
-        StructType([StructField('a',
-            MapType(IntegerType(),
-            ArrayType(IntegerType(), True), True), True),
-        StructField('b', StringType(), True)]), True),
-    StructField('B', FloatType(), True)])
-    """
-    if isinstance(dt, StructType):
-        new_fields = []
-        for field in dt.fields:
-            new_fields.append(
-                StructField(
-                    field.name,
-                    as_nullable_spark_type(field.dataType),
-                    nullable=True,
-                    metadata=field.metadata,
-                )
-            )
-        return StructType(new_fields)
-    elif isinstance(dt, ArrayType):
-        return ArrayType(as_nullable_spark_type(dt.elementType), containsNull=True)
-    elif isinstance(dt, MapType):
-        return MapType(
-            as_nullable_spark_type(dt.keyType),
-            as_nullable_spark_type(dt.valueType),
-            valueContainsNull=True,
-        )
-    else:
-        return dt
-@overload
-def force_decimal_precision_scale(
-    dt: StructType, *, precision: int = ..., scale: int = ...
-) -> StructType:
-    ...
-@overload
-def force_decimal_precision_scale(
-    dt: ArrayType, *, precision: int = ..., scale: int = ...
-) -> ArrayType:
-    ...
-@overload
-def force_decimal_precision_scale(
-    dt: MapType, *, precision: int = ..., scale: int = ...
-) -> MapType:
-    ...
-@overload
-def force_decimal_precision_scale(
-    dt: DataType, *, precision: int = ..., scale: int = ...
-) -> DataType:
-    ...
-def force_decimal_precision_scale(
-    dt: DataType, *, precision: int = 38, scale: int = 18
-) -> DataType:
-    """
-    Returns a data type with a fixed decimal type.
-    The precision and scale of the decimal type are fixed with the given values.
-    Examples
-    --------
-    >>> from pyspark.sql.types import *
-    >>> force_decimal_precision_scale(StructType([
-    ...     StructField("A", DecimalType(10, 0), True),
-    ...     StructField("B", DecimalType(14, 7), False)]))  # doctest: +NORMALIZE_WHITESPACE
-    StructType([StructField('A', DecimalType(38,18), True),
-                StructField('B', DecimalType(38,18), False)])
-    >>> force_decimal_precision_scale(StructType([
-    ...     StructField("A",
-    ...         StructType([
-    ...             StructField('a',
-    ...                 MapType(DecimalType(5, 0),
-    ...                 ArrayType(DecimalType(20, 0), False), False), False),
-    ...             StructField('b', StringType(), True)])),
-    ...     StructField("B", DecimalType(30, 15), False)]),
-    ...     precision=30, scale=15)  # doctest: +NORMALIZE_WHITESPACE
-    StructType([StructField('A',
-        StructType([StructField('a',
-            MapType(DecimalType(30,15),
-            ArrayType(DecimalType(30,15), False), False), False),
-        StructField('b', StringType(), True)]), True),
-    StructField('B', DecimalType(30,15), False)])
-    """
-    if isinstance(dt, StructType):
-        new_fields = []
-        for field in dt.fields:
-            new_fields.append(
-                StructField(
-                    field.name,
-                    force_decimal_precision_scale(field.dataType, precision=precision, scale=scale),
-                    nullable=field.nullable,
-                    metadata=field.metadata,
-                )
-            )
-        return StructType(new_fields)
-    elif isinstance(dt, ArrayType):
-        return ArrayType(
-            force_decimal_precision_scale(dt.elementType, precision=precision, scale=scale),
-            containsNull=dt.containsNull,
-        )
-    elif isinstance(dt, MapType):
-        return MapType(
-            force_decimal_precision_scale(dt.keyType, precision=precision, scale=scale),
-            force_decimal_precision_scale(dt.valueType, precision=precision, scale=scale),
-            valueContainsNull=dt.valueContainsNull,
-        )
-    elif isinstance(dt, DecimalType):
-        return DecimalType(precision=precision, scale=scale)
-    else:
-        return dt
-def _test() -> None:
-    import doctest
-    import sys
-    import pyspark.pandas.spark.utils
-    globs = pyspark.pandas.spark.utils.__dict__.copy()
-    (failure_count, test_count) = doctest.testmod(
-        pyspark.pandas.spark.utils,
-        globs=globs,
-        optionflags=doctest.ELLIPSIS | doctest.NORMALIZE_WHITESPACE,
-    )
-    if failure_count:
-        sys.exit(-1)
-if __name__ == "__main__":
-    _test()