PyPI - snowflake-ml-python - Versions diffs - 1.7.4__py3-none-any.whl → 1.8.0__py3-none-any.whl - Mend

snowflake-ml-python 1.7.4py3-none-any.whl → 1.8.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (73) hide show

snowflake/ml/model/_signatures/tensorflow_handler.py CHANGED Viewed

@@ -1,5 +1,5 @@
 from collections import abc
-from typing import TYPE_CHECKING, List, Literal, Optional, Sequence, Union
+from typing import TYPE_CHECKING, Literal, Optional, Sequence, Union
 import numpy as np
 import pandas as pd
@@ -11,12 +11,62 @@ from snowflake.ml._internal.exceptions import (
     exceptions as snowml_exceptions,
 )
 from snowflake.ml.model import type_hints as model_types
-from snowflake.ml.model._signatures import base_handler, core
+from snowflake.ml.model._signatures import base_handler, core, numpy_handler
 if TYPE_CHECKING:
     import tensorflow
+class TensorflowTensorHandler(base_handler.BaseDataHandler[Union["tensorflow.Tensor", "tensorflow.Variable"]]):
+    @staticmethod
+    def can_handle(
+        data: model_types.SupportedDataType,
+    ) -> TypeGuard[Union["tensorflow.Tensor", "tensorflow.Variable"]]:
+        return type_utils.LazyType("tensorflow.Tensor").isinstance(data) or type_utils.LazyType(
+            "tensorflow.Variable"
+        ).isinstance(data)
+    @staticmethod
+    def count(data: Union["tensorflow.Tensor", "tensorflow.Variable"]) -> int:
+        return numpy_handler.NumpyArrayHandler.count(data.numpy())
+    @staticmethod
+    def truncate(
+        data: Union["tensorflow.Tensor", "tensorflow.Variable"], length: int
+    ) -> Union["tensorflow.Tensor", "tensorflow.Variable"]:
+        return data[: min(TensorflowTensorHandler.count(data), length)]
+    @staticmethod
+    def validate(data: Union["tensorflow.Tensor", "tensorflow.Variable"]) -> None:
+        numpy_handler.NumpyArrayHandler.validate(data.numpy())
+    @staticmethod
+    def infer_signature(
+        data: Union["tensorflow.Tensor", "tensorflow.Variable"], role: Literal["input", "output"]
+    ) -> Sequence[core.BaseFeatureSpec]:
+        return numpy_handler.NumpyArrayHandler.infer_signature(data.numpy(), role=role)
+    @staticmethod
+    def convert_to_df(
+        data: Union["tensorflow.Tensor", "tensorflow.Variable"], ensure_serializable: bool = True
+    ) -> pd.DataFrame:
+        return numpy_handler.NumpyArrayHandler.convert_to_df(data.numpy(), ensure_serializable=ensure_serializable)
+    @staticmethod
+    def convert_from_df(
+        df: pd.DataFrame, features: Optional[Sequence[core.BaseFeatureSpec]] = None
+    ) -> Union["tensorflow.Tensor", "tensorflow.Variable"]:
+        import tensorflow as tf
+        if features is None:
+            if any(dtype == np.dtype("O") for dtype in df.dtypes):
+                return tf.convert_to_tensor(np.array(df.to_numpy().tolist()))
+            return tf.convert_to_tensor(df.to_numpy())
+        assert isinstance(features[0], core.FeatureSpec)
+        return tf.convert_to_tensor(np.array(df.to_numpy().tolist()), dtype=features[0]._dtype._numpy_type)
 class SeqOfTensorflowTensorHandler(
     base_handler.BaseDataHandler[Sequence[Union["tensorflow.Tensor", "tensorflow.Variable"]]]
 ):
@@ -28,35 +78,12 @@ class SeqOfTensorflowTensorHandler(
             return False
         if len(data) == 0:
             return False
-        if type_utils.LazyType("tensorflow.Tensor").isinstance(data[0]) or type_utils.LazyType(
-            "tensorflow.Variable"
-        ).isinstance(data[0]):
-            return all(
-                type_utils.LazyType("tensorflow.Tensor").isinstance(data_col)
-                or type_utils.LazyType("tensorflow.Variable").isinstance(data_col)
-                for data_col in data
-            )
-        return False
+        return all(TensorflowTensorHandler.can_handle(data_col) for data_col in data)
     @staticmethod
     def count(data: Sequence[Union["tensorflow.Tensor", "tensorflow.Variable"]]) -> int:
-        import tensorflow as tf
-        rows = []
-        for data_col in data:
-            shapes = data_col.shape.as_list()
-            if data_col.shape == tf.TensorShape(None) or (not shapes) or (shapes[0] is None):
-                # Unknown shape array
-                raise snowml_exceptions.SnowflakeMLException(
-                    error_code=error_codes.INVALID_DATA,
-                    original_exception=ValueError("Data Validation Error: Unknown shape data is found."),
-                )
-            # Make mypy happy
-            assert isinstance(shapes[0], int)
-            rows.append(shapes[0])
-        return min(rows)
+        return min(TensorflowTensorHandler.count(data_col) for data_col in data)
     @staticmethod
     def truncate(
@@ -66,49 +93,14 @@ class SeqOfTensorflowTensorHandler(
     @staticmethod
     def validate(data: Sequence[Union["tensorflow.Tensor", "tensorflow.Variable"]]) -> None:
-        import tensorflow as tf
         for data_col in data:
-            if data_col.shape == tf.TensorShape(None) or any(dim is None for dim in data_col.shape.as_list()):
-                # Unknown shape array
-                raise snowml_exceptions.SnowflakeMLException(
-                    error_code=error_codes.INVALID_DATA,
-                    original_exception=ValueError("Data Validation Error: Unknown shape data is found."),
-                )
-            if data_col.shape == tf.TensorShape([0]):
-                # Empty array
-                raise snowml_exceptions.SnowflakeMLException(
-                    error_code=error_codes.INVALID_DATA,
-                    original_exception=ValueError("Data Validation Error: Empty data is found."),
-                )
-            if data_col.shape == tf.TensorShape([1]) or data_col.shape == tf.TensorShape([]):
-                # scalar
-                raise snowml_exceptions.SnowflakeMLException(
-                    error_code=error_codes.INVALID_DATA,
-                    original_exception=ValueError("Data Validation Error: Scalar data is found."),
-                )
+            TensorflowTensorHandler.validate(data_col)
     @staticmethod
     def infer_signature(
         data: Sequence[Union["tensorflow.Tensor", "tensorflow.Variable"]], role: Literal["input", "output"]
     ) -> Sequence[core.BaseFeatureSpec]:
-        feature_prefix = f"{SeqOfTensorflowTensorHandler.FEATURE_PREFIX}_"
-        features: List[core.BaseFeatureSpec] = []
-        role_prefix = (
-            SeqOfTensorflowTensorHandler.INPUT_PREFIX if role == "input" else SeqOfTensorflowTensorHandler.OUTPUT_PREFIX
-        ) + "_"
-        for i, data_col in enumerate(data):
-            dtype = core.DataType.from_numpy_type(data_col.dtype.as_numpy_dtype)
-            ft_name = f"{role_prefix}{feature_prefix}{i}"
-            if len(data_col.shape) == 1:
-                features.append(core.FeatureSpec(dtype=dtype, name=ft_name, nullable=False))
-            else:
-                ft_shape = tuple(data_col.shape[1:])
-                features.append(core.FeatureSpec(dtype=dtype, name=ft_name, shape=ft_shape, nullable=False))
-        return features
+        return numpy_handler.SeqOfNumpyArrayHandler.infer_signature([data_col.numpy() for data_col in data], role=role)
     @staticmethod
     def convert_to_df(
@@ -129,8 +121,10 @@ class SeqOfTensorflowTensorHandler(
             for feature in features:
                 if isinstance(feature, core.FeatureGroupSpec):
                     raise snowml_exceptions.SnowflakeMLException(
-                        error_code=error_codes.NOT_IMPLEMENTED,
-                        original_exception=NotImplementedError("FeatureGroupSpec is not supported."),
+                        error_code=error_codes.INVALID_DATA_TYPE,
+                        original_exception=NotImplementedError(
+                            "FeatureGroupSpec is not supported when converting to Tensorflow tensor."
+                        ),
                     )
                 assert isinstance(feature, core.FeatureSpec), "Invalid feature kind."
                 res.append(

snowflake/ml/model/_signatures/utils.py CHANGED Viewed

@@ -135,7 +135,16 @@ def huggingface_pipeline_signature_auto_infer(task: str, params: Dict[str, Any])
                 core.FeatureSpec(name="inputs", dtype=core.DataType.STRING),
             ],
             outputs=[
-                core.FeatureSpec(name="outputs", dtype=core.DataType.STRING),
+                core.FeatureGroupSpec(
+                    name="outputs",
+                    specs=[
+                        core.FeatureSpec(name="sequence", dtype=core.DataType.STRING),
+                        core.FeatureSpec(name="score", dtype=core.DataType.DOUBLE),
+                        core.FeatureSpec(name="token", dtype=core.DataType.INT64),
+                        core.FeatureSpec(name="token_str", dtype=core.DataType.STRING),
+                    ],
+                    shape=(-1,),
+                ),
             ],
         )
@@ -144,7 +153,18 @@ def huggingface_pipeline_signature_auto_infer(task: str, params: Dict[str, Any])
         return core.ModelSignature(
             inputs=[core.FeatureSpec(name="inputs", dtype=core.DataType.STRING)],
             outputs=[
-                core.FeatureSpec(name="outputs", dtype=core.DataType.STRING),
+                core.FeatureGroupSpec(
+                    name="outputs",
+                    specs=[
+                        core.FeatureSpec(name="word", dtype=core.DataType.STRING),
+                        core.FeatureSpec(name="score", dtype=core.DataType.DOUBLE),
+                        core.FeatureSpec(name="entity", dtype=core.DataType.STRING),
+                        core.FeatureSpec(name="index", dtype=core.DataType.INT64),
+                        core.FeatureSpec(name="start", dtype=core.DataType.INT64),
+                        core.FeatureSpec(name="end", dtype=core.DataType.INT64),
+                    ],
+                    shape=(-1,),
+                ),
             ],
         )
@@ -171,7 +191,16 @@ def huggingface_pipeline_signature_auto_infer(task: str, params: Dict[str, Any])
                 core.FeatureSpec(name="context", dtype=core.DataType.STRING),
             ],
             outputs=[
-                core.FeatureSpec(name="outputs", dtype=core.DataType.STRING),
+                core.FeatureGroupSpec(
+                    name="answers",
+                    specs=[
+                        core.FeatureSpec(name="score", dtype=core.DataType.DOUBLE),
+                        core.FeatureSpec(name="start", dtype=core.DataType.INT64),
+                        core.FeatureSpec(name="end", dtype=core.DataType.INT64),
+                        core.FeatureSpec(name="answer", dtype=core.DataType.STRING),
+                    ],
+                    shape=(-1,),
+                ),
             ],
         )
@@ -216,17 +245,22 @@ def huggingface_pipeline_signature_auto_infer(task: str, params: Dict[str, Any])
             return core.ModelSignature(
                 inputs=[
                     core.FeatureSpec(name="text", dtype=core.DataType.STRING),
-                    core.FeatureSpec(name="text_pair", dtype=core.DataType.STRING),
                 ],
                 outputs=[
-                    core.FeatureSpec(name="outputs", dtype=core.DataType.STRING),
+                    core.FeatureGroupSpec(
+                        name="labels",
+                        specs=[
+                            core.FeatureSpec(name="label", dtype=core.DataType.STRING),
+                            core.FeatureSpec(name="score", dtype=core.DataType.DOUBLE),
+                        ],
+                        shape=(-1,),
+                    ),
                 ],
             )
         # Else, return a dict per input
         return core.ModelSignature(
             inputs=[
                 core.FeatureSpec(name="text", dtype=core.DataType.STRING),
-                core.FeatureSpec(name="text_pair", dtype=core.DataType.STRING),
             ],
             outputs=[
                 core.FeatureSpec(name="label", dtype=core.DataType.STRING),
@@ -243,9 +277,24 @@ def huggingface_pipeline_signature_auto_infer(task: str, params: Dict[str, Any])
             )
         # Always generate a list of dict per input
         return core.ModelSignature(
-            inputs=[core.FeatureSpec(name="inputs", dtype=core.DataType.STRING)],
+            inputs=[
+                core.FeatureGroupSpec(
+                    name="inputs",
+                    specs=[
+                        core.FeatureSpec(name="role", dtype=core.DataType.STRING),
+                        core.FeatureSpec(name="content", dtype=core.DataType.STRING),
+                    ],
+                    shape=(-1,),
+                ),
+            ],
             outputs=[
-                core.FeatureSpec(name="outputs", dtype=core.DataType.STRING),
+                core.FeatureGroupSpec(
+                    name="outputs",
+                    specs=[
+                        core.FeatureSpec(name="generated_text", dtype=core.DataType.STRING),
+                    ],
+                    shape=(-1,),
+                )
             ],
         )
@@ -300,3 +349,66 @@ def huggingface_pipeline_signature_auto_infer(task: str, params: Dict[str, Any])
 def series_dropna(series: pd.Series) -> pd.Series:
     return series.dropna(inplace=False).reset_index(drop=True).convert_dtypes()
+def infer_list(name: str, data: List[Any]) -> core.BaseFeatureSpec:
+    """Infer the feature specification from a list.
+    Args:
+        name: Feature name.
+        data: A list.
+    Raises:
+        SnowflakeMLException: ValueError: Raised when empty list is provided.
+    Returns:
+        A feature specification.
+    """
+    if not data:
+        raise snowml_exceptions.SnowflakeMLException(
+            error_code=error_codes.INVALID_DATA,
+            original_exception=ValueError("Data Validation Error: Empty list is found."),
+        )
+    if all(isinstance(value, dict) for value in data):
+        ft = infer_dict(name, data[0])
+        ft._name = name
+        ft._shape = (-1,)
+        return ft
+    arr = convert_list_to_ndarray(data)
+    arr_dtype = core.DataType.from_numpy_type(arr.dtype)
+    return core.FeatureSpec(name=name, dtype=arr_dtype, shape=arr.shape)
+def infer_dict(name: str, data: Dict[str, Any]) -> core.FeatureGroupSpec:
+    """Infer the feature specification from a dictionary.
+    Args:
+        name: Feature name.
+        data: A dictionary.
+    Raises:
+        SnowflakeMLException: ValueError: Raised when empty dictionary is provided.
+        SnowflakeMLException: ValueError: Raised when empty list is found in the dictionary.
+    Returns:
+        A feature group specification.
+    """
+    if not data:
+        raise snowml_exceptions.SnowflakeMLException(
+            error_code=error_codes.INVALID_DATA,
+            original_exception=ValueError("Data Validation Error: Empty dictionary is found."),
+        )
+    specs = []
+    for key, value in data.items():
+        if isinstance(value, list):
+            specs.append(infer_list(key, value))
+        elif isinstance(value, dict):
+            specs.append(infer_dict(key, value))
+        else:
+            specs.append(core.FeatureSpec(name=key, dtype=core.DataType.from_numpy_type(np.array(value).dtype)))
+    return core.FeatureGroupSpec(name=name, specs=specs)

snowflake/ml/model/custom_model.py CHANGED Viewed

@@ -76,7 +76,7 @@ class ModelRef:
     def __getattr__(self, method_name: str) -> Any:
         if hasattr(self._model, method_name):
             return MethodRef(self, method_name)
-        raise TypeError(f"Model is does not have {method_name}.")
+        raise AttributeError(f"Method {method_name} not found in model {self._name}.")
     def __getstate__(self) -> Dict[str, Any]:
         state = self.__dict__.copy()
@@ -94,7 +94,16 @@ class ModelRef:
 class ModelContext:
     """
-    Context for a custom model showing paths to artifacts and mapping between model name and object reference.
+    Context for a custom model storing paths to file artifacts and model object references.
+    Keyword argument values can be string file paths or supported in-memory models. Paths and model references
+    can be accessed with dictionary access methods in the custom model.
+    For example, in a custom model with `context=ModelContext(my_file='my_file.pkl', my_model=my_model)`,
+    the filepath and model reference can be accessed with `self.context['my_file']` and `self.context['my_model']`
+    in the inference and init methods.
+    The use of `artifacts` and `model_refs` arguments is deprecated. Set keyword arguments directly instead.
     Attributes:
         artifacts: A dictionary mapping the name of the artifact to its path.
@@ -267,14 +276,14 @@ def _validate_predict_function(func: Callable[[model_types.CustomModelType, pd.D
 def inference_api(
-    func: Callable[[model_types.CustomModelType, pd.DataFrame], pd.DataFrame]
+    func: Callable[[model_types.CustomModelType, pd.DataFrame], pd.DataFrame],
 ) -> Callable[[model_types.CustomModelType, pd.DataFrame], pd.DataFrame]:
     func.__dict__["_is_inference_api"] = True
     return func
 def partitioned_inference_api(
-    func: Callable[[model_types.CustomModelType, pd.DataFrame], pd.DataFrame]
+    func: Callable[[model_types.CustomModelType, pd.DataFrame], pd.DataFrame],
 ) -> Callable[[model_types.CustomModelType, pd.DataFrame], pd.DataFrame]:
     func.__dict__["_is_inference_api"] = True
     func.__dict__["_is_partitioned_inference_api"] = True

snowflake/ml/model/model_signature.py CHANGED Viewed

@@ -21,6 +21,7 @@ from typing_extensions import Never
 import snowflake.snowpark
 import snowflake.snowpark.functions as F
 import snowflake.snowpark.types as spt
+from snowflake.ml._internal import telemetry
 from snowflake.ml._internal.exceptions import (
     error_codes,
     exceptions as snowml_exceptions,
@@ -31,6 +32,7 @@ from snowflake.ml.model._signatures import (
     base_handler,
     builtins_handler as builtins_handler,
     core,
+    dmatrix_handler,
     numpy_handler,
     pandas_handler,
     pytorch_handler,
@@ -51,11 +53,17 @@ _LOCAL_DATA_HANDLERS: List[Type[base_handler.BaseDataHandler[Any]]] = [
     numpy_handler.NumpyArrayHandler,
     builtins_handler.ListOfBuiltinHandler,
     numpy_handler.SeqOfNumpyArrayHandler,
+    pytorch_handler.PyTorchTensorHandler,
     pytorch_handler.SeqOfPyTorchTensorHandler,
+    tensorflow_handler.TensorflowTensorHandler,
     tensorflow_handler.SeqOfTensorflowTensorHandler,
+    dmatrix_handler.XGBoostDMatrixHandler,
 ]
 _ALL_DATA_HANDLERS = _LOCAL_DATA_HANDLERS + [snowpark_handler.SnowparkDataFrameHandler]
+_TELEMETRY_PROJECT = "MLOps"
+_MODEL_TELEMETRY_SUBPROJECT = "ModelSignature"
 def _truncate_data(
     data: model_types.SupportedDataType,
@@ -214,7 +222,6 @@ def _validate_pandas_df(data: pd.DataFrame, features: Sequence[core.BaseFeatureS
         strict: Enable strict validation, this includes value range based validation
     Raises:
-        SnowflakeMLException: NotImplementedError: FeatureGroupSpec is not supported.
         SnowflakeMLException: ValueError: Raised when a feature cannot be found.
         SnowflakeMLException: ValueError: Raised when feature is scalar but confront list element.
         SnowflakeMLException: ValueError: Raised when feature type is not aligned in list element.
@@ -232,7 +239,10 @@ def _validate_pandas_df(data: pd.DataFrame, features: Sequence[core.BaseFeatureS
         except KeyError:
             raise snowml_exceptions.SnowflakeMLException(
                 error_code=error_codes.INVALID_DATA,
-                original_exception=ValueError(f"Data Validation Error: feature {ft_name} does not exist in data."),
+                original_exception=ValueError(
+                    f"Data Validation Error: feature {ft_name} does not exist in data. "
+                    f"Available columns are {data.columns}."
+                ),
             )
         if data_col.isnull().any():
@@ -240,10 +250,15 @@ def _validate_pandas_df(data: pd.DataFrame, features: Sequence[core.BaseFeatureS
         df_col_dtype = data_col.dtype
         if isinstance(feature, core.FeatureGroupSpec):
-            raise snowml_exceptions.SnowflakeMLException(
-                error_code=error_codes.NOT_IMPLEMENTED,
-                original_exception=NotImplementedError("FeatureGroupSpec is not supported."),
-            )
+            if df_col_dtype != np.dtype("O"):
+                raise snowml_exceptions.SnowflakeMLException(
+                    error_code=error_codes.INVALID_DATA,
+                    original_exception=ValueError(
+                        f"Data Validation Error in feature group {ft_name}: "
+                        + f"It needs to be a dictionary or list of dictionary, but get {df_col_dtype}."
+                    ),
+                )
+            continue
         assert isinstance(feature, core.FeatureSpec)  # assert for mypy.
         ft_type = feature._dtype
@@ -433,7 +448,6 @@ def _validate_snowpark_data(
         strict: Enable strict validation, this includes value range based validation.
     Raises:
-        SnowflakeMLException: NotImplementedError: FeatureGroupSpec is not supported.
         SnowflakeMLException: ValueError: Raised when confronting invalid feature.
         SnowflakeMLException: ValueError: Raised when a feature cannot be found.
@@ -463,10 +477,15 @@ def _validate_snowpark_data(
                 if field.name == ft_name:
                     found = True
                     if isinstance(feature, core.FeatureGroupSpec):
-                        raise snowml_exceptions.SnowflakeMLException(
-                            error_code=error_codes.NOT_IMPLEMENTED,
-                            original_exception=NotImplementedError("FeatureGroupSpec is not supported."),
-                        )
+                        if not isinstance(field.datatype, (spt.ArrayType, spt.StructType, spt.VariantType)):
+                            errors[identifier_rule].append(
+                                ValueError(
+                                    f"Data Validation Error in feature group {feature.name}: "
+                                    + f"Feature expects {feature.as_snowpark_type()},"
+                                    + f" while {field.name} has type {field.datatype}."
+                                ),
+                            )
+                        continue
                     assert isinstance(feature, core.FeatureSpec)  # mypy
                     ft_type = feature._dtype
                     field_data_type = field.datatype
@@ -640,11 +659,14 @@ def _validate_snowpark_type_feature(
             )
-def _convert_local_data_to_df(data: model_types.SupportedLocalDataType) -> pd.DataFrame:
+def _convert_local_data_to_df(
+    data: model_types.SupportedLocalDataType, ensure_serializable: bool = False
+) -> pd.DataFrame:
     """Convert local data to pandas DataFrame or Snowpark DataFrame
     Args:
         data: The provided data.
+        ensure_serializable: Ensure the data is serializable. Defaults to False.
     Raises:
         SnowflakeMLException: NotImplementedError: Raised when data cannot be handled by any data handler.
@@ -656,7 +678,7 @@ def _convert_local_data_to_df(data: model_types.SupportedLocalDataType) -> pd.Da
     for handler in _LOCAL_DATA_HANDLERS:
         if handler.can_handle(data):
             handler.validate(data)
-            df = handler.convert_to_df(data, ensure_serializable=False)
+            df = handler.convert_to_df(data, ensure_serializable=ensure_serializable)
             break
     if df is None:
         raise snowml_exceptions.SnowflakeMLException(
@@ -687,6 +709,10 @@ def _convert_and_validate_local_data(
     return df
+@telemetry.send_api_usage_telemetry(
+    project=_TELEMETRY_PROJECT,
+    subproject=_MODEL_TELEMETRY_SUBPROJECT,
+)
 def infer_signature(
     input_data: model_types.SupportedLocalDataType,
     output_data: model_types.SupportedLocalDataType,

snowflake/ml/model/type_hints.py CHANGED Viewed

@@ -7,6 +7,7 @@ from typing_extensions import NotRequired
 if TYPE_CHECKING:
     import catboost
+    import keras
     import lightgbm
     import mlflow
     import numpy as np
@@ -25,7 +26,15 @@ if TYPE_CHECKING:
     from snowflake.ml.modeling.framework import base  # noqa: F401
-_SupportedBuiltins = Union[int, float, bool, str, bytes, "_SupportedBuiltinsList"]
+_SupportedBuiltins = Union[
+    int,
+    float,
+    bool,
+    str,
+    bytes,
+    Dict[str, Union["_SupportedBuiltins", "_SupportedBuiltinsList"]],
+    "_SupportedBuiltinsList",
+]
 _SupportedNumpyDtype = Union[
     "np.int8",
     "np.int16",
@@ -47,7 +56,7 @@ _SupportedBuiltinsList = Sequence[_SupportedBuiltins]
 _SupportedArrayLike = Union[_SupportedNumpyArray, "torch.Tensor", "tensorflow.Tensor", "tensorflow.Variable"]
 SupportedLocalDataType = Union[
-    "pd.DataFrame", _SupportedNumpyArray, Sequence[_SupportedArrayLike], _SupportedBuiltinsList
+    "pd.DataFrame", _SupportedArrayLike, Sequence[_SupportedArrayLike], _SupportedBuiltinsList
 ]
 SupportedDataType = Union[SupportedLocalDataType, "snowflake.snowpark.DataFrame"]
@@ -68,6 +77,7 @@ SupportedRequireSignatureModelType = Union[
     "torch.nn.Module",
     "torch.jit.ScriptModule",
     "tensorflow.Module",
+    "keras.Model",
 ]
 SupportedNoSignatureRequirementsModelType = Union[
@@ -103,6 +113,7 @@ Here is all acceptable types of Snowflake native model packaging and its handler
 | transformers.Pipeline | huggingface_pipeline.py | _HuggingFacePipelineHandler |
 | huggingface_pipeline.HuggingFacePipelineModel | huggingface_pipeline.py | _HuggingFacePipelineHandler |
 | sentence_transformers.SentenceTransformer | sentence_transformers.py | _SentenceTransformerHandler |
+| keras.Model | keras.py | _KerasHandler |
 """
 SupportedModelHandlerType = Literal[
@@ -118,6 +129,7 @@ SupportedModelHandlerType = Literal[
     "tensorflow",
     "torchscript",
     "xgboost",
+    "keras",
 ]
 _ModelType = TypeVar("_ModelType", bound=SupportedModelType)
@@ -173,16 +185,19 @@ class SNOWModelSaveOptions(BaseModelSaveOption):
 class PyTorchSaveOptions(BaseModelSaveOption):
     target_methods: NotRequired[Sequence[str]]
     cuda_version: NotRequired[str]
+    multiple_inputs: NotRequired[bool]
 class TorchScriptSaveOptions(BaseModelSaveOption):
     target_methods: NotRequired[Sequence[str]]
     cuda_version: NotRequired[str]
+    multiple_inputs: NotRequired[bool]
 class TensorflowSaveOptions(BaseModelSaveOption):
     target_methods: NotRequired[Sequence[str]]
     cuda_version: NotRequired[str]
+    multiple_inputs: NotRequired[bool]
 class MLFlowSaveOptions(BaseModelSaveOption):
@@ -202,6 +217,11 @@ class SentenceTransformersSaveOptions(BaseModelSaveOption):
     batch_size: NotRequired[int]
+class KerasSaveOptions(BaseModelSaveOption):
+    target_methods: NotRequired[Sequence[str]]
+    cuda_version: NotRequired[str]
 ModelSaveOption = Union[
     BaseModelSaveOption,
     CatBoostModelSaveOptions,
@@ -216,6 +236,7 @@ ModelSaveOption = Union[
     MLFlowSaveOptions,
     HuggingFaceSaveOptions,
     SentenceTransformersSaveOptions,
+    KerasSaveOptions,
 ]
@@ -276,6 +297,10 @@ class SentenceTransformersLoadOptions(BaseModelLoadOption):
     device: NotRequired[str]
+class KerasLoadOptions(BaseModelLoadOption):
+    use_gpu: NotRequired[bool]
 ModelLoadOption = Union[
     BaseModelLoadOption,
     CatBoostModelLoadOptions,
@@ -290,6 +315,7 @@ ModelLoadOption = Union[
     MLFlowLoadOptions,
     HuggingFaceLoadOptions,
     SentenceTransformersLoadOptions,
+    KerasLoadOptions,
 ]

snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_handlers.py CHANGED Viewed

@@ -199,8 +199,21 @@ class SnowparkTransformHandlers:
         if expected_output_cols_type == "":
             expected_output_cols_type = "string"
         assert expected_output_cols_type is not None
+        # If there is only one output column, the UDF might have generate complex objects (lists, dicts).
+        # In such cases, we attempt to not do explicit cast. (Example: PolynomialFeatures.transform)
+        try_parse_object = len(expected_output_cols) == 1 and expected_output_cols_type != "string"
         for output_feature in expected_output_cols:
-            output_cols.append(F.col(INTERMEDIATE_OBJ_NAME)[output_feature].astype(expected_output_cols_type))
+            column_expr = F.col(INTERMEDIATE_OBJ_NAME)[output_feature]
+            if try_parse_object and df_res.count() > 0:
+                # Only do type casting if it's not an array
+                if not df_res.select(F.is_array(column_expr)).first()[0]:
+                    column_expr = column_expr.astype(expected_output_cols_type)
+            else:
+                column_expr = column_expr.astype(expected_output_cols_type)
+            output_cols.append(column_expr)
             output_col_names.append(identifier.get_inferred_name(output_feature))
         # Extract output from INTERMEDIATE_OBJ_NAME and drop that column

snowflake-ml-python 1.7.4__py3-none-any.whl → 1.8.0__py3-none-any.whl

snowflake-ml-python 1.7.4py3-none-any.whl → 1.8.0py3-none-any.whl