PyPI - snowflake-ml-python - Versions diffs - 1.7.4__py3-none-any.whl → 1.8.0__py3-none-any.whl - Mend

snowflake-ml-python 1.7.4py3-none-any.whl → 1.8.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (73) hide show

snowflake/ml/model/_signatures/core.py CHANGED Viewed

@@ -199,9 +199,16 @@ class DataType(Enum):
 class BaseFeatureSpec(ABC):
     """Abstract Class for specification of a feature."""
-    def __init__(self, name: str) -> None:
+    def __init__(self, name: str, shape: Optional[Tuple[int, ...]]) -> None:
         self._name = name
+        if shape and not isinstance(shape, tuple):
+            raise snowml_exceptions.SnowflakeMLException(
+                error_code=error_codes.INVALID_TYPE,
+                original_exception=TypeError("Shape should be a tuple if presented."),
+            )
+        self._shape = shape
     @final
     @property
     def name(self) -> str:
@@ -213,6 +220,11 @@ class BaseFeatureSpec(ABC):
         """Convert to corresponding Snowpark Type."""
         pass
+    @abstractmethod
+    def as_dtype(self, force_numpy_dtype: bool = False) -> Union[npt.DTypeLike, str, PandasExtensionTypes]:
+        """Convert to corresponding local Type."""
+        pass
     @abstractmethod
     def to_dict(self) -> Dict[str, Any]:
         """Serialization"""
@@ -256,7 +268,7 @@ class FeatureSpec(BaseFeatureSpec):
             SnowflakeMLException: TypeError: When the dtype input type is incorrect.
             SnowflakeMLException: TypeError: When the shape input type is incorrect.
         """
-        super().__init__(name=name)
+        super().__init__(name=name, shape=shape)
         if not isinstance(dtype, DataType):
             raise snowml_exceptions.SnowflakeMLException(
@@ -265,13 +277,6 @@ class FeatureSpec(BaseFeatureSpec):
             )
         self._dtype = dtype
-        if shape and not isinstance(shape, tuple):
-            raise snowml_exceptions.SnowflakeMLException(
-                error_code=error_codes.INVALID_TYPE,
-                original_exception=TypeError("Shape should be a tuple if presented."),
-            )
-        self._shape = shape
         self._nullable = nullable
     def as_snowpark_type(self) -> spt.DataType:
@@ -282,7 +287,7 @@ class FeatureSpec(BaseFeatureSpec):
             result_type = spt.ArrayType(result_type)
         return result_type
-    def as_dtype(self) -> Union[npt.DTypeLike, str, PandasExtensionTypes]:
+    def as_dtype(self, force_numpy_dtype: bool = False) -> Union[npt.DTypeLike, str, PandasExtensionTypes]:
         """Convert to corresponding local Type."""
         if not self._shape:
@@ -291,7 +296,7 @@ class FeatureSpec(BaseFeatureSpec):
                 return self._dtype._value
             np_type = self._dtype._numpy_type
-            if self._nullable:
+            if self._nullable and not force_numpy_dtype:
                 np_to_pd_dtype_mapping = {
                     np.int8: pd.Int8Dtype(),
                     np.int16: pd.Int16Dtype(),
@@ -386,15 +391,23 @@ class FeatureSpec(BaseFeatureSpec):
 class FeatureGroupSpec(BaseFeatureSpec):
     """Specification of a group of features in Snowflake native model packaging."""
-    def __init__(self, name: str, specs: List[FeatureSpec]) -> None:
+    def __init__(self, name: str, specs: List[BaseFeatureSpec], shape: Optional[Tuple[int, ...]] = None) -> None:
         """Initialize a feature group.
         Args:
             name: Name of the feature group.
             specs: A list of feature specifications that composes the group. All children feature specs have to have
                 name. And all of them should have the same type.
+            shape: Used to represent scalar feature, 1-d feature list,
+                or n-d tensor. Use -1 to represent variable length. Defaults to None.
+                Examples:
+                    - None: scalar
+                    - (2,): 1d list with a fixed length of 2.
+                    - (-1,): 1d list with variable length, used for ragged tensor representation.
+                    - (d1, d2, d3): 3d tensor.
         """
-        super().__init__(name=name)
+        super().__init__(name=name, shape=shape)
         self._specs = specs
         self._validate()
@@ -409,47 +422,52 @@ class FeatureGroupSpec(BaseFeatureSpec):
                 error_code=error_codes.INVALID_ARGUMENT,
                 original_exception=ValueError("All children feature specs have to have name."),
             )
-        if not (all(s._shape is None for s in self._specs) or all(s._shape is not None for s in self._specs)):
-            raise snowml_exceptions.SnowflakeMLException(
-                error_code=error_codes.INVALID_ARGUMENT,
-                original_exception=ValueError("All children feature specs have to have same shape."),
-            )
-        first_type = self._specs[0]._dtype
-        if not all(s._dtype == first_type for s in self._specs):
-            raise snowml_exceptions.SnowflakeMLException(
-                error_code=error_codes.INVALID_ARGUMENT,
-                original_exception=ValueError("All children feature specs have to have same type."),
-            )
     def as_snowpark_type(self) -> spt.DataType:
-        first_type = self._specs[0].as_snowpark_type()
-        return spt.MapType(spt.StringType(), first_type)
+        spt_type = spt.StructType(
+            fields=[
+                spt.StructField(
+                    s._name, datatype=s.as_snowpark_type(), nullable=s._nullable if isinstance(s, FeatureSpec) else True
+                )
+                for s in self._specs
+            ]
+        )
+        if not self._shape:
+            return spt_type
+        return spt.ArrayType(spt_type)
     def __eq__(self, other: object) -> bool:
         if isinstance(other, FeatureGroupSpec):
-            return self._specs == other._specs
+            return self._name == other._name and self._specs == other._specs and self._shape == other._shape
         else:
             return False
     def __repr__(self) -> str:
         spec_strs = ",\n\t\t".join(repr(spec) for spec in self._specs)
+        shape_str = f", shape={repr(self._shape)}" if self._shape else ""
         return textwrap.dedent(
             f"""FeatureGroupSpec(
                 name={repr(self._name)},
                 specs=[
                     {spec_strs}
-                ]
+                ]{shape_str}
             )
             """
         )
+    def as_dtype(self, force_numpy_dtype: bool = False) -> Union[npt.DTypeLike, str, PandasExtensionTypes]:
+        return np.object_
     def to_dict(self) -> Dict[str, Any]:
         """Serialize the feature group into a dict.
         Returns:
             A dict that serializes the feature group.
         """
-        return {"feature_group": {"name": self._name, "specs": [s.to_dict() for s in self._specs]}}
+        base_dict: Dict[str, Any] = {"name": self._name, "specs": [s.to_dict() for s in self._specs]}
+        if self._shape is not None:
+            base_dict["shape"] = self._shape
+        return base_dict
     @classmethod
     def from_dict(cls, input_dict: Dict[str, Any]) -> "FeatureGroupSpec":
@@ -462,10 +480,13 @@ class FeatureGroupSpec(BaseFeatureSpec):
             A feature group instance deserialized and created from the dict.
         """
         specs = []
-        for e in input_dict["feature_group"]["specs"]:
-            spec = FeatureSpec.from_dict(e)
+        for e in input_dict["specs"]:
+            spec = FeatureGroupSpec.from_dict(e) if "specs" in e else FeatureSpec.from_dict(e)
             specs.append(spec)
-        return FeatureGroupSpec(name=input_dict["feature_group"]["name"], specs=specs)
+        shape = input_dict.get("shape", None)
+        if shape:
+            shape = tuple(shape)
+        return FeatureGroupSpec(name=input_dict["name"], specs=specs, shape=shape)
 class ModelSignature:
@@ -525,7 +546,7 @@ class ModelSignature:
         sig_inputs = loaded["inputs"]
         deserialize_spec: Callable[[Dict[str, Any]], BaseFeatureSpec] = lambda sig_spec: (
-            FeatureGroupSpec.from_dict(sig_spec) if "feature_group" in sig_spec else FeatureSpec.from_dict(sig_spec)
+            FeatureGroupSpec.from_dict(sig_spec) if "specs" in sig_spec else FeatureSpec.from_dict(sig_spec)
         )
         return ModelSignature(

snowflake/ml/model/_signatures/dmatrix_handler.py ADDED Viewed

@@ -0,0 +1,98 @@
+from typing import TYPE_CHECKING, List, Literal, Optional, Sequence
+import numpy as np
+import pandas as pd
+from typing_extensions import TypeGuard
+from snowflake.ml._internal import type_utils
+from snowflake.ml._internal.exceptions import (
+    error_codes,
+    exceptions as snowml_exceptions,
+)
+from snowflake.ml.model import type_hints as model_types
+from snowflake.ml.model._signatures import base_handler, core
+if TYPE_CHECKING:
+    import xgboost
+class XGBoostDMatrixHandler(base_handler.BaseDataHandler["xgboost.DMatrix"]):
+    @staticmethod
+    def can_handle(data: model_types.SupportedDataType) -> TypeGuard["xgboost.DMatrix"]:
+        return type_utils.LazyType("xgboost.DMatrix").isinstance(data)
+    @staticmethod
+    def count(data: "xgboost.DMatrix") -> int:
+        return data.num_row()
+    @staticmethod
+    def truncate(data: "xgboost.DMatrix", length: int) -> "xgboost.DMatrix":
+        num_rows = min(
+            XGBoostDMatrixHandler.count(data),
+            length,
+        )
+        return data.slice(list(range(num_rows)))
+    @staticmethod
+    def validate(data: "xgboost.DMatrix") -> None:
+        if data.num_row() == 0:
+            raise snowml_exceptions.SnowflakeMLException(
+                error_code=error_codes.INVALID_DATA,
+                original_exception=ValueError("Data Validation Error: Empty data is found."),
+            )
+    @staticmethod
+    def infer_signature(data: "xgboost.DMatrix", role: Literal["input", "output"]) -> Sequence[core.BaseFeatureSpec]:
+        feature_prefix = f"{XGBoostDMatrixHandler.FEATURE_PREFIX}_"
+        features: List[core.BaseFeatureSpec] = []
+        role_prefix = (
+            XGBoostDMatrixHandler.INPUT_PREFIX if role == "input" else XGBoostDMatrixHandler.OUTPUT_PREFIX
+        ) + "_"
+        feature_names = data.feature_names or []
+        feature_types = data.feature_types or []
+        for i, (feature_name, dtype) in enumerate(zip(feature_names, feature_types)):
+            if not feature_name:
+                ft_name = f"{role_prefix}{feature_prefix}{i}"
+            else:
+                ft_name = feature_name
+            features.append(core.FeatureSpec(dtype=core.DataType.from_numpy_type(np.dtype(dtype)), name=ft_name))
+        return features
+    @staticmethod
+    def convert_to_df(data: "xgboost.DMatrix", ensure_serializable: bool = True) -> pd.DataFrame:
+        df = pd.DataFrame(data.get_data().toarray(), columns=data.feature_names)
+        feature_types = data.feature_types or []
+        if feature_types:
+            for idx, col in enumerate(df.columns):
+                dtype = feature_types[idx]
+                df[col] = df[col].astype(dtype)
+        return df
+    @staticmethod
+    def convert_from_df(
+        df: pd.DataFrame, features: Optional[Sequence[core.BaseFeatureSpec]] = None
+    ) -> "xgboost.DMatrix":
+        import xgboost as xgb
+        if not features:
+            return xgb.DMatrix(df)
+        else:
+            feature_names = []
+            feature_types = []
+            for feature in features:
+                if isinstance(feature, core.FeatureGroupSpec):
+                    raise snowml_exceptions.SnowflakeMLException(
+                        error_code=error_codes.NOT_IMPLEMENTED,
+                        original_exception=NotImplementedError("FeatureGroupSpec is not supported."),
+                    )
+                assert isinstance(feature, core.FeatureSpec), "Invalid feature kind."
+                feature_names.append(feature.name)
+                feature_types.append(feature._dtype._numpy_type)
+            return xgb.DMatrix(df, feature_names=feature_names, feature_types=feature_types)

snowflake/ml/model/_signatures/numpy_handler.py CHANGED Viewed

@@ -1,5 +1,5 @@
 from collections import abc
-from typing import List, Literal, Sequence
+from typing import Literal, Sequence
 import numpy as np
 import pandas as pd
@@ -10,7 +10,7 @@ from snowflake.ml._internal.exceptions import (
     exceptions as snowml_exceptions,
 )
 from snowflake.ml.model import type_hints as model_types
-from snowflake.ml.model._signatures import base_handler, core
+from snowflake.ml.model._signatures import base_handler, core, pandas_handler
 class NumpyArrayHandler(base_handler.BaseDataHandler[model_types._SupportedNumpyArray]):
@@ -46,11 +46,15 @@ class NumpyArrayHandler(base_handler.BaseDataHandler[model_types._SupportedNumpy
     def infer_signature(
         data: model_types._SupportedNumpyArray, role: Literal["input", "output"]
     ) -> Sequence[core.BaseFeatureSpec]:
+        if data.dtype == np.object_:
+            return pandas_handler.PandasDataFrameHandler.infer_signature(
+                NumpyArrayHandler.convert_to_df(data), role=role
+            )
         feature_prefix = f"{NumpyArrayHandler.FEATURE_PREFIX}_"
         dtype = core.DataType.from_numpy_type(data.dtype)
         role_prefix = (NumpyArrayHandler.INPUT_PREFIX if role == "input" else NumpyArrayHandler.OUTPUT_PREFIX) + "_"
         if len(data.shape) == 1:
-            return [core.FeatureSpec(dtype=dtype, name=f"{role_prefix}{feature_prefix}0")]
+            return [core.FeatureSpec(dtype=dtype, name=f"{role_prefix}{feature_prefix}0", nullable=False)]
         else:
             # For high-dimension array, 0-axis is for batch, 1-axis is for column, further more is details of columns.
             features = []
@@ -59,9 +63,9 @@ class NumpyArrayHandler(base_handler.BaseDataHandler[model_types._SupportedNumpy
             for col_data, ft_name in zip(data[0], ft_names):
                 if isinstance(col_data, np.ndarray):
                     ft_shape = np.shape(col_data)
-                    features.append(core.FeatureSpec(dtype=dtype, name=ft_name, shape=ft_shape))
+                    features.append(core.FeatureSpec(dtype=dtype, name=ft_name, shape=ft_shape, nullable=False))
                 else:
-                    features.append(core.FeatureSpec(dtype=dtype, name=ft_name))
+                    features.append(core.FeatureSpec(dtype=dtype, name=ft_name, nullable=False))
             return features
     @staticmethod
@@ -108,21 +112,9 @@ class SeqOfNumpyArrayHandler(base_handler.BaseDataHandler[Sequence[model_types._
     def infer_signature(
         data: Sequence[model_types._SupportedNumpyArray], role: Literal["input", "output"]
     ) -> Sequence[core.BaseFeatureSpec]:
-        feature_prefix = f"{SeqOfNumpyArrayHandler.FEATURE_PREFIX}_"
-        features: List[core.BaseFeatureSpec] = []
-        role_prefix = (
-            SeqOfNumpyArrayHandler.INPUT_PREFIX if role == "input" else SeqOfNumpyArrayHandler.OUTPUT_PREFIX
-        ) + "_"
-        for i, data_col in enumerate(data):
-            dtype = core.DataType.from_numpy_type(data_col.dtype)
-            ft_name = f"{role_prefix}{feature_prefix}{i}"
-            if len(data_col.shape) == 1:
-                features.append(core.FeatureSpec(dtype=dtype, name=ft_name))
-            else:
-                ft_shape = tuple(data_col.shape[1:])
-                features.append(core.FeatureSpec(dtype=dtype, name=ft_name, shape=ft_shape))
-        return features
+        return pandas_handler.PandasDataFrameHandler.infer_signature(
+            SeqOfNumpyArrayHandler.convert_to_df(data, ensure_serializable=False), role=role
+        )
     @staticmethod
     def convert_to_df(

snowflake/ml/model/_signatures/pandas_handler.py CHANGED Viewed

@@ -72,13 +72,6 @@ class PandasDataFrameHandler(base_handler.BaseDataHandler[pd.DataFrame]):
         df_col_dtypes = [data[col].dtype for col in data.columns]
         for df_col, df_col_dtype in zip(df_cols, df_col_dtypes):
             df_col_data = data[df_col]
-            if df_col_data.isnull().all():
-                raise snowml_exceptions.SnowflakeMLException(
-                    error_code=error_codes.INVALID_DATA,
-                    original_exception=ValueError(
-                        f"Data Validation Error: There is no non-null data in column {df_col}."
-                    ),
-                )
             if df_col_data.isnull().any():
                 warnings.warn(
                     (
@@ -104,25 +97,7 @@ class PandasDataFrameHandler(base_handler.BaseDataHandler[pd.DataFrame]):
                         ),
                     )
-                if isinstance(df_col_data.iloc[0], list):
-                    arr = utils.convert_list_to_ndarray(df_col_data.iloc[0])
-                    arr_dtype = core.DataType.from_numpy_type(arr.dtype)
-                    converted_data_list = [utils.convert_list_to_ndarray(data_row) for data_row in df_col_data]
-                    if not all(
-                        core.DataType.from_numpy_type(converted_data.dtype) == arr_dtype
-                        for converted_data in converted_data_list
-                    ):
-                        raise snowml_exceptions.SnowflakeMLException(
-                            error_code=error_codes.INVALID_DATA,
-                            original_exception=ValueError(
-                                "Data Validation Error: "
-                                + f"Inconsistent type of element in object found in column data {df_col_data}."
-                            ),
-                        )
-                elif isinstance(df_col_data.iloc[0], np.ndarray):
+                if isinstance(df_col_data.iloc[0], np.ndarray):
                     arr_dtype = core.DataType.from_numpy_type(df_col_data.iloc[0].dtype)
                     if not all(core.DataType.from_numpy_type(data_row.dtype) == arr_dtype for data_row in df_col_data):
@@ -133,7 +108,7 @@ class PandasDataFrameHandler(base_handler.BaseDataHandler[pd.DataFrame]):
                                 + f"Inconsistent type of element in object found in column data {df_col_data}."
                             ),
                         )
-                elif not isinstance(df_col_data.iloc[0], (str, bytes)):
+                elif not isinstance(df_col_data.iloc[0], (str, bytes, dict, list)):
                     raise snowml_exceptions.SnowflakeMLException(
                         error_code=error_codes.INVALID_DATA,
                         original_exception=ValueError(
@@ -163,22 +138,38 @@ class PandasDataFrameHandler(base_handler.BaseDataHandler[pd.DataFrame]):
         specs = []
         for df_col, df_col_dtype, ft_name in zip(df_cols, df_col_dtypes, ft_names):
             df_col_data = data[df_col]
+            if df_col_data.isnull().all():
+                raise snowml_exceptions.SnowflakeMLException(
+                    error_code=error_codes.INVALID_DATA,
+                    original_exception=ValueError(
+                        "Data Validation Error: "
+                        f"There is no non-null data in column {df_col} so the signature cannot be inferred."
+                    ),
+                )
             if df_col_data.isnull().any():
                 df_col_data = utils.series_dropna(df_col_data)
             df_col_dtype = df_col_data.dtype
             if df_col_dtype == np.dtype("O"):
                 if isinstance(df_col_data.iloc[0], list):
-                    arr = utils.convert_list_to_ndarray(df_col_data.iloc[0])
-                    arr_dtype = core.DataType.from_numpy_type(arr.dtype)
-                    ft_shape = np.shape(df_col_data.iloc[0])
-                    converted_data_list = [utils.convert_list_to_ndarray(data_row) for data_row in df_col_data]
-                    if not all(np.shape(converted_data) == ft_shape for converted_data in converted_data_list):
-                        ft_shape = (-1,)
-                    specs.append(core.FeatureSpec(dtype=arr_dtype, name=ft_name, shape=ft_shape))
+                    spec_0 = utils.infer_list(ft_name, df_col_data.iloc[0])
+                    for i in range(1, len(df_col_data)):
+                        spec = utils.infer_list(ft_name, df_col_data.iloc[i])
+                        if spec._shape != spec_0._shape:
+                            spec_0._shape = (-1,)
+                            spec._shape = (-1,)
+                        if spec != spec_0:
+                            raise snowml_exceptions.SnowflakeMLException(
+                                error_code=error_codes.INVALID_DATA,
+                                original_exception=ValueError(
+                                    "Unable to construct signature: "
+                                    f"Ragged nested or Unsupported list-like data {df_col_data} confronted."
+                                ),
+                            )
+                    specs.append(spec_0)
+                elif isinstance(df_col_data.iloc[0], dict):
+                    specs.append(utils.infer_dict(ft_name, df_col_data.iloc[0]))
                 elif isinstance(df_col_data.iloc[0], np.ndarray):
                     arr_dtype = core.DataType.from_numpy_type(df_col_data.iloc[0].dtype)
                     ft_shape = np.shape(df_col_data.iloc[0])

snowflake/ml/model/_signatures/pytorch_handler.py CHANGED Viewed

@@ -1,5 +1,5 @@
 from collections import abc
-from typing import TYPE_CHECKING, List, Literal, Optional, Sequence
+from typing import TYPE_CHECKING, Literal, Optional, Sequence
 import numpy as np
 import pandas as pd
@@ -11,12 +11,54 @@ from snowflake.ml._internal.exceptions import (
     exceptions as snowml_exceptions,
 )
 from snowflake.ml.model import type_hints as model_types
-from snowflake.ml.model._signatures import base_handler, core
+from snowflake.ml.model._signatures import base_handler, core, numpy_handler
 if TYPE_CHECKING:
     import torch
+class PyTorchTensorHandler(base_handler.BaseDataHandler["torch.Tensor"]):
+    @staticmethod
+    def can_handle(data: model_types.SupportedDataType) -> TypeGuard["torch.Tensor"]:
+        return type_utils.LazyType("torch.Tensor").isinstance(data)
+    @staticmethod
+    def count(data: "torch.Tensor") -> int:
+        return data.shape[0]
+    @staticmethod
+    def truncate(data: "torch.Tensor", length: int) -> "torch.Tensor":
+        return data[: min(PyTorchTensorHandler.count(data), length)]
+    @staticmethod
+    def validate(data: "torch.Tensor") -> None:
+        return numpy_handler.NumpyArrayHandler.validate(data.detach().cpu().numpy())
+    @staticmethod
+    def infer_signature(data: "torch.Tensor", role: Literal["input", "output"]) -> Sequence[core.BaseFeatureSpec]:
+        return numpy_handler.NumpyArrayHandler.infer_signature(data.detach().cpu().numpy(), role=role)
+    @staticmethod
+    def convert_to_df(data: "torch.Tensor", ensure_serializable: bool = True) -> pd.DataFrame:
+        return numpy_handler.NumpyArrayHandler.convert_to_df(
+            data.detach().cpu().numpy(), ensure_serializable=ensure_serializable
+        )
+    @staticmethod
+    def convert_from_df(df: pd.DataFrame, features: Optional[Sequence[core.BaseFeatureSpec]] = None) -> "torch.Tensor":
+        import torch
+        if features is None:
+            if any(dtype == np.dtype("O") for dtype in df.dtypes):
+                return torch.from_numpy(np.array(df.to_numpy().tolist()))
+            return torch.from_numpy(df.to_numpy())
+        assert isinstance(features[0], core.FeatureSpec)
+        return torch.from_numpy(
+            np.array(df.to_numpy().tolist(), dtype=features[0]._dtype._numpy_type),
+        )
 class SeqOfPyTorchTensorHandler(base_handler.BaseDataHandler[Sequence["torch.Tensor"]]):
     @staticmethod
     def can_handle(data: model_types.SupportedDataType) -> TypeGuard[Sequence["torch.Tensor"]]:
@@ -24,56 +66,28 @@ class SeqOfPyTorchTensorHandler(base_handler.BaseDataHandler[Sequence["torch.Ten
             return False
         if len(data) == 0:
             return False
-        if type_utils.LazyType("torch.Tensor").isinstance(data[0]):
-            return all(type_utils.LazyType("torch.Tensor").isinstance(data_col) for data_col in data)
-        return False
+        return all(PyTorchTensorHandler.can_handle(data_col) for data_col in data)
     @staticmethod
     def count(data: Sequence["torch.Tensor"]) -> int:
-        return min(data_col.shape[0] for data_col in data)  # type: ignore[no-any-return]
+        return min(PyTorchTensorHandler.count(data_col) for data_col in data)
     @staticmethod
     def truncate(data: Sequence["torch.Tensor"], length: int) -> Sequence["torch.Tensor"]:
-        return [data_col[: min(SeqOfPyTorchTensorHandler.count(data), 10)] for data_col in data]
+        return [data_col[: min(SeqOfPyTorchTensorHandler.count(data), length)] for data_col in data]
     @staticmethod
     def validate(data: Sequence["torch.Tensor"]) -> None:
-        import torch
         for data_col in data:
-            if data_col.shape == torch.Size([0]):
-                # Empty array
-                raise snowml_exceptions.SnowflakeMLException(
-                    error_code=error_codes.INVALID_DATA,
-                    original_exception=ValueError("Data Validation Error: Empty data is found."),
-                )
-            if data_col.shape == torch.Size([1]):
-                # scalar
-                raise snowml_exceptions.SnowflakeMLException(
-                    error_code=error_codes.INVALID_DATA,
-                    original_exception=ValueError("Data Validation Error: Scalar data is found."),
-                )
+            PyTorchTensorHandler.validate(data_col)
     @staticmethod
     def infer_signature(
         data: Sequence["torch.Tensor"], role: Literal["input", "output"]
     ) -> Sequence[core.BaseFeatureSpec]:
-        feature_prefix = f"{SeqOfPyTorchTensorHandler.FEATURE_PREFIX}_"
-        features: List[core.BaseFeatureSpec] = []
-        role_prefix = (
-            SeqOfPyTorchTensorHandler.INPUT_PREFIX if role == "input" else SeqOfPyTorchTensorHandler.OUTPUT_PREFIX
-        ) + "_"
-        for i, data_col in enumerate(data):
-            dtype = core.DataType.from_torch_type(data_col.dtype)
-            ft_name = f"{role_prefix}{feature_prefix}{i}"
-            if len(data_col.shape) == 1:
-                features.append(core.FeatureSpec(dtype=dtype, name=ft_name, nullable=False))
-            else:
-                ft_shape = tuple(data_col.shape[1:])
-                features.append(core.FeatureSpec(dtype=dtype, name=ft_name, shape=ft_shape, nullable=False))
-        return features
+        return numpy_handler.SeqOfNumpyArrayHandler.infer_signature(
+            [data_col.detach().cpu().numpy() for data_col in data], role=role
+        )
     @staticmethod
     def convert_to_df(data: Sequence["torch.Tensor"], ensure_serializable: bool = True) -> pd.DataFrame:
@@ -81,8 +95,8 @@ class SeqOfPyTorchTensorHandler(base_handler.BaseDataHandler[Sequence["torch.Ten
         # the content is still numpy array so that the type could be preserved.
         # But that would not serializable and cannot use as UDF input and output.
         if ensure_serializable:
-            return pd.DataFrame({i: data_col.detach().to("cpu").numpy().tolist() for i, data_col in enumerate(data)})
-        return pd.DataFrame({i: list(data_col.detach().to("cpu").numpy()) for i, data_col in enumerate(data)})
+            return pd.DataFrame({i: data_col.detach().cpu().numpy().tolist() for i, data_col in enumerate(data)})
+        return pd.DataFrame({i: list(data_col.detach().cpu().numpy()) for i, data_col in enumerate(data)})
     @staticmethod
     def convert_from_df(
@@ -95,8 +109,10 @@ class SeqOfPyTorchTensorHandler(base_handler.BaseDataHandler[Sequence["torch.Ten
             for feature in features:
                 if isinstance(feature, core.FeatureGroupSpec):
                     raise snowml_exceptions.SnowflakeMLException(
-                        error_code=error_codes.NOT_IMPLEMENTED,
-                        original_exception=NotImplementedError("FeatureGroupSpec is not supported."),
+                        error_code=error_codes.INVALID_DATA_TYPE,
+                        original_exception=NotImplementedError(
+                            "FeatureGroupSpec is not supported when converting to Tensorflow tensor."
+                        ),
                     )
                 assert isinstance(feature, core.FeatureSpec), "Invalid feature kind."
                 res.append(torch.from_numpy(np.stack(df[feature.name].to_numpy()).astype(feature._dtype._numpy_type)))

snowflake/ml/model/_signatures/snowpark_handler.py CHANGED Viewed

@@ -65,12 +65,6 @@ class SnowparkDataFrameHandler(base_handler.BaseDataHandler[snowflake.snowpark.D
         dtype_map = {}
         if features:
             for feature in features:
-                if isinstance(feature, core.FeatureGroupSpec):
-                    raise snowml_exceptions.SnowflakeMLException(
-                        error_code=error_codes.NOT_IMPLEMENTED,
-                        original_exception=NotImplementedError("FeatureGroupSpec is not supported."),
-                    )
-                assert isinstance(feature, core.FeatureSpec), "Invalid feature kind."
                 dtype_map[feature.name] = feature.as_dtype()
         df_local = data.to_pandas()
@@ -122,12 +116,6 @@ class SnowparkDataFrameHandler(base_handler.BaseDataHandler[snowflake.snowpark.D
         column_names = []
         columns = []
         for feature in features:
-            if isinstance(feature, core.FeatureGroupSpec):
-                raise snowml_exceptions.SnowflakeMLException(
-                    error_code=error_codes.NOT_IMPLEMENTED,
-                    original_exception=NotImplementedError("FeatureGroupSpec is not supported."),
-                )
-            assert isinstance(feature, core.FeatureSpec), "Invalid feature kind."
             column_names.append(identifier.get_inferred_name(feature.name))
             columns.append(F.col(identifier.get_inferred_name(feature.name)).cast(feature.as_snowpark_type()))

snowflake-ml-python 1.7.4__py3-none-any.whl → 1.8.0__py3-none-any.whl

snowflake-ml-python 1.7.4py3-none-any.whl → 1.8.0py3-none-any.whl