PyPI - snowflake-ml-python - Versions diffs - 1.6.4__py3-none-any.whl → 1.7.1__py3-none-any.whl - Mend

snowflake-ml-python 1.6.4py3-none-any.whl → 1.7.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (176) hide show

snowflake/ml/model/_packager/model_handlers/xgboost.py CHANGED Viewed

@@ -1,7 +1,6 @@
 # mypy: disable-error-code="import"
 import os
 import warnings
-from importlib import metadata as importlib_metadata
 from typing import (
     TYPE_CHECKING,
     Any,
@@ -16,23 +15,19 @@ from typing import (
 import numpy as np
 import pandas as pd
-from packaging import version
 from typing_extensions import TypeGuard, Unpack
 from snowflake.ml._internal import type_utils
 from snowflake.ml.model import custom_model, model_signature, type_hints as model_types
 from snowflake.ml.model._packager.model_env import model_env
-from snowflake.ml.model._packager.model_handlers import (
-    _base,
-    _utils as handlers_utils,
-    model_objective_utils,
-)
+from snowflake.ml.model._packager.model_handlers import _base, _utils as handlers_utils
 from snowflake.ml.model._packager.model_handlers_migrator import base_migrator
 from snowflake.ml.model._packager.model_meta import (
     model_blob_meta,
     model_meta as model_meta_api,
     model_meta_schema,
 )
+from snowflake.ml.model._packager.model_task import model_task_utils
 from snowflake.ml.model._signatures import numpy_handler, utils as model_signature_utils
 if TYPE_CHECKING:
@@ -94,23 +89,6 @@ class XGBModelHandler(_base.BaseModelHandler[Union["xgboost.Booster", "xgboost.X
         assert isinstance(model, xgboost.Booster) or isinstance(model, xgboost.XGBModel)
-        local_xgb_version = None
-        try:
-            local_dist = importlib_metadata.distribution("xgboost")
-            local_xgb_version = version.parse(local_dist.version)
-        except importlib_metadata.PackageNotFoundError:
-            pass
-        if local_xgb_version and local_xgb_version >= version.parse("2.1.0") and enable_explainability:
-            warnings.warn(
-                f"This version of xgboost {local_xgb_version} does not work with shap 0.42.1."
-                + "If you want model explanations, lower the xgboost version to <2.1.0.",
-                category=UserWarning,
-                stacklevel=1,
-            )
-            enable_explainability = False
         if not is_sub_model:
             target_methods = handlers_utils.get_target_methods(
                 model=model,
@@ -139,7 +117,7 @@ class XGBModelHandler(_base.BaseModelHandler[Union["xgboost.Booster", "xgboost.X
                 sample_input_data=sample_input_data,
                 get_prediction_fn=get_prediction,
             )
-            model_task_and_output = model_objective_utils.get_model_task_and_output_type(model)
+            model_task_and_output = model_task_utils.get_model_task_and_output_type(model)
             model_meta.task = handlers_utils.validate_model_task(model_meta.task, model_task_and_output.task)
             if enable_explainability:
                 model_meta = handlers_utils.add_explain_method_signature(
@@ -187,23 +165,15 @@ class XGBModelHandler(_base.BaseModelHandler[Union["xgboost.Booster", "xgboost.X
             ],
             check_local_version=True,
         )
-        if local_xgb_version and local_xgb_version >= version.parse("2.0.0") and enable_explainability:
-            model_meta.env.include_if_absent(
-                [
-                    model_env.ModelDependency(requirement="xgboost==2.0.*", pip_name="xgboost"),
-                ],
-                check_local_version=False,
-            )
-        else:
-            model_meta.env.include_if_absent(
-                [
-                    model_env.ModelDependency(requirement="xgboost", pip_name="xgboost"),
-                ],
-                check_local_version=True,
-            )
+        model_meta.env.include_if_absent(
+            [
+                model_env.ModelDependency(requirement="xgboost", pip_name="xgboost"),
+            ],
+            check_local_version=True,
+        )
         if enable_explainability:
-            model_meta.env.include_if_absent([model_env.ModelDependency(requirement="shap", pip_name="shap")])
+            model_meta.env.include_if_absent([model_env.ModelDependency(requirement="shap>=0.46.0", pip_name="shap")])
             model_meta.explain_algorithm = model_meta_schema.ModelExplainAlgorithm.SHAP
         model_meta.env.cuda_version = kwargs.get("cuda_version", model_env.DEFAULT_CUDA_VERSION)

snowflake/ml/model/_packager/model_meta/_packaging_requirements.py CHANGED Viewed

@@ -1,3 +1,2 @@
-REQUIREMENTS = [
-    "cloudpickle>=2.0.0"
-]
+REQUIREMENTS = ['cloudpickle>=2.0.0']
+ALL_REQUIREMENTS=['cloudpickle>=2.0.0']

snowflake/ml/model/_packager/model_meta/model_meta_schema.py CHANGED Viewed

@@ -58,11 +58,16 @@ class XgboostModelBlobOptions(BaseModelBlobOptions):
     xgb_estimator_type: Required[str]
+class TensorflowModelBlobOptions(BaseModelBlobOptions):
+    is_keras_model: Required[bool]
 ModelBlobOptions = Union[
     BaseModelBlobOptions,
     HuggingFacePipelineModelBlobOptions,
     MLFlowModelBlobOptions,
     XgboostModelBlobOptions,
+    TensorflowModelBlobOptions,
 ]

snowflake/ml/model/_packager/model_packager.py CHANGED Viewed

@@ -61,17 +61,6 @@ class ModelPackager:
         if not options:
             options = model_types.BaseModelSaveOption()
-        # here handling the case of enable_explainability is False/None
-        enable_explainability = options.get("enable_explainability", None)
-        if enable_explainability is False or enable_explainability is None:
-            if (signatures is not None) and (sample_input_data is not None):
-                raise snowml_exceptions.SnowflakeMLException(
-                    error_code=error_codes.INVALID_ARGUMENT,
-                    original_exception=ValueError(
-                        "Signatures and sample_input_data both cannot be specified at the same time."
-                    ),
-                )
         handler = model_handler.find_handler(model)
         if handler is None:
             raise snowml_exceptions.SnowflakeMLException(

snowflake/ml/model/_packager/model_runtime/_snowml_inference_alternative_requirements.py CHANGED Viewed

@@ -1,10 +1,2 @@
-REQUIREMENTS = [
-    "absl-py>=0.15,<2",
-    "anyio>=3.5.0,<4",
-    "numpy>=1.23,<2",
-    "packaging>=20.9,<24",
-    "pandas>=1.0.0,<3",
-    "pyyaml>=6.0,<7",
-    "snowflake-snowpark-python>=1.17.0,<2",
-    "typing-extensions>=4.1.0,<5"
-]
+REQUIREMENTS = ['absl-py>=0.15,<2', 'aiohttp!=4.0.0a0, !=4.0.0a1', 'anyio>=3.5.0,<4', 'cachetools>=3.1.1,<6', 'cloudpickle>=2.0.0', 'cryptography', 'fsspec>=2022.11,<2024', 'importlib_resources>=6.1.1, <7', 'numpy>=1.23,<2', 'packaging>=20.9,<25', 'pandas>=1.0.0,<3', 'pyarrow', 'pytimeparse>=1.1.8,<2', 'pyyaml>=6.0,<7', 'requests', 'retrying>=1.3.3,<2', 's3fs>=2022.11,<2024', 'scikit-learn>=1.4,<1.6', 'scipy>=1.9,<2', 'snowflake-connector-python>=3.5.0,<4', 'snowflake-snowpark-python>=1.17.0,<2', 'sqlparse>=0.4,<1', 'typing-extensions>=4.1.0,<5', 'xgboost>=1.7.3,<3']
+ALL_REQUIREMENTS=['absl-py>=0.15,<2', 'aiohttp!=4.0.0a0, !=4.0.0a1', 'anyio>=3.5.0,<4', 'cachetools>=3.1.1,<6', 'catboost>=1.2.0, <2', 'cloudpickle>=2.0.0', 'cryptography', 'fsspec>=2022.11,<2024', 'importlib_resources>=6.1.1, <7', 'lightgbm>=4.1.0, <5', 'mlflow>=2.1.0,<2.4', 'numpy>=1.23,<2', 'packaging>=20.9,<25', 'pandas>=1.0.0,<3', 'pyarrow', 'pytimeparse>=1.1.8,<2', 'pytorch>=2.0.1,<2.3.0', 'pyyaml>=6.0,<7', 'requests', 'retrying>=1.3.3,<2', 's3fs>=2022.11,<2024', 'scikit-learn>=1.4,<1.6', 'scipy>=1.9,<2', 'sentence-transformers>=2.2.2,<3', 'sentencepiece>=0.1.95,<1', 'shap>=0.46.0,<1', 'snowflake-connector-python>=3.5.0,<4', 'snowflake-snowpark-python>=1.17.0,<2', 'sqlparse>=0.4,<1', 'tensorflow>=2.10,<3', 'tokenizers>=0.10,<1', 'torchdata>=0.4,<1', 'transformers>=4.32.1,<5', 'typing-extensions>=4.1.0,<5', 'xgboost>=1.7.3,<3']

snowflake/ml/model/_packager/model_runtime/model_runtime.py CHANGED Viewed

@@ -17,6 +17,8 @@ _SNOWML_INFERENCE_ALTERNATIVE_DEPENDENCIES = [
     for r in _snowml_inference_alternative_requirements.REQUIREMENTS
 ]
+PACKAGES_NOT_ALLOWED_IN_WAREHOUSE = ["snowflake-connector-python", "pyarrow"]
 class ModelRuntime:
     """Class to represent runtime in a model, which controls the runtime and version, imports and dependencies.
@@ -61,15 +63,8 @@ class ModelRuntime:
             ],
         )
-        if not is_warehouse and self.embed_local_ml_library:
-            self.runtime_env.include_if_absent(
-                [
-                    model_env.ModelDependency(
-                        requirement="pyarrow",
-                        pip_name="pyarrow",
-                    )
-                ],
-            )
+        if is_warehouse and self.embed_local_ml_library:
+            self.runtime_env.remove_if_present_conda(PACKAGES_NOT_ALLOWED_IN_WAREHOUSE)
         if is_gpu:
             self.runtime_env.generate_env_for_cuda()

snowflake/ml/model/_packager/{model_handlers/model_objective_utils.py → model_task/model_task_utils.py} RENAMED Viewed

@@ -84,7 +84,7 @@ def get_model_task_lightgbm(model: Union["lightgbm.Booster", "lightgbm.LGBMModel
     if type_utils.LazyType("lightgbm.Booster").isinstance(model):
         model_task = model.params["objective"]  # type: ignore[attr-defined]
     elif hasattr(model, "objective_"):
-        model_task = model.objective_
+        model_task = model.objective_  # type: ignore[assignment]
     if model_task in _BINARY_CLASSIFICATION_OBJECTIVES:
         return type_hints.Task.TABULAR_BINARY_CLASSIFICATION
     if model_task in _MULTI_CLASSIFICATION_OBJECTIVES:
@@ -128,42 +128,30 @@ def get_model_task_xgb(model: Union["xgboost.Booster", "xgboost.XGBModel"]) -> t
     return type_hints.Task.UNKNOWN
-def get_model_task_and_output_type(model: Any) -> ModelTaskAndOutputType:
+def _get_model_task(model: Any) -> type_hints.Task:
     if type_utils.LazyType("xgboost.Booster").isinstance(model) or type_utils.LazyType("xgboost.XGBModel").isinstance(
         model
     ):
-        task = get_model_task_xgb(model)
-        output_type = model_signature.DataType.DOUBLE
-        if task == type_hints.Task.TABULAR_MULTI_CLASSIFICATION:
-            output_type = model_signature.DataType.STRING
-        return ModelTaskAndOutputType(task=task, output_type=output_type)
+        return get_model_task_xgb(model)
     if type_utils.LazyType("lightgbm.Booster").isinstance(model) or type_utils.LazyType(
         "lightgbm.LGBMModel"
     ).isinstance(model):
-        task = get_model_task_lightgbm(model)
-        output_type = model_signature.DataType.DOUBLE
-        if task in [
-            type_hints.Task.TABULAR_BINARY_CLASSIFICATION,
-            type_hints.Task.TABULAR_MULTI_CLASSIFICATION,
-        ]:
-            output_type = model_signature.DataType.STRING
-        return ModelTaskAndOutputType(task=task, output_type=output_type)
+        return get_model_task_lightgbm(model)
     if type_utils.LazyType("catboost.CatBoost").isinstance(model):
-        task = get_model_task_catboost(model)
-        output_type = model_signature.DataType.DOUBLE
-        if task == type_hints.Task.TABULAR_MULTI_CLASSIFICATION:
-            output_type = model_signature.DataType.STRING
-        return ModelTaskAndOutputType(task=task, output_type=output_type)
+        return get_model_task_catboost(model)
     if type_utils.LazyType("sklearn.base.BaseEstimator").isinstance(model) or type_utils.LazyType(
         "sklearn.pipeline.Pipeline"
     ).isinstance(model):
-        task = get_task_skl(model)
-        output_type = model_signature.DataType.DOUBLE
-        if task == type_hints.Task.TABULAR_MULTI_CLASSIFICATION:
-            output_type = model_signature.DataType.STRING
-        return ModelTaskAndOutputType(task=task, output_type=output_type)
+        return get_task_skl(model)
     raise ValueError(f"Model type {type(model)} is not supported")
+def get_model_task_and_output_type(model: Any) -> ModelTaskAndOutputType:
+    task = _get_model_task(model)
+    output_type = model_signature.DataType.DOUBLE
+    if task == type_hints.Task.TABULAR_MULTI_CLASSIFICATION:
+        output_type = model_signature.DataType.STRING
+    return ModelTaskAndOutputType(task=task, output_type=output_type)

snowflake/ml/model/_signatures/core.py CHANGED Viewed

@@ -14,10 +14,12 @@ from typing import (
     Type,
     Union,
     final,
+    get_args,
 )
 import numpy as np
 import numpy.typing as npt
+import pandas as pd
 import snowflake.snowpark.types as spt
 from snowflake.ml._internal.exceptions import (
@@ -29,6 +31,21 @@ if TYPE_CHECKING:
     import mlflow
     import torch
+PandasExtensionTypes = Union[
+    pd.Int8Dtype,
+    pd.Int16Dtype,
+    pd.Int32Dtype,
+    pd.Int64Dtype,
+    pd.UInt8Dtype,
+    pd.UInt16Dtype,
+    pd.UInt32Dtype,
+    pd.UInt64Dtype,
+    pd.Float32Dtype,
+    pd.Float64Dtype,
+    pd.BooleanDtype,
+    pd.StringDtype,
+]
 class DataType(Enum):
     def __init__(self, value: str, snowpark_type: Type[spt.DataType], numpy_type: npt.DTypeLike) -> None:
@@ -67,11 +84,11 @@ class DataType(Enum):
         return f"DataType.{self.name}"
     @classmethod
-    def from_numpy_type(cls, np_type: npt.DTypeLike) -> "DataType":
+    def from_numpy_type(cls, input_type: Union[npt.DTypeLike, PandasExtensionTypes]) -> "DataType":
         """Translate numpy dtype to DataType for signature definition.
         Args:
-            np_type: The numpy dtype.
+            input_type: The numpy dtype or Pandas Extension Dtype
         Raises:
             SnowflakeMLException: NotImplementedError: Raised when the given numpy type is not supported.
@@ -79,6 +96,10 @@ class DataType(Enum):
         Returns:
             Corresponding DataType.
         """
+        # To support pandas extension dtype
+        if isinstance(input_type, get_args(PandasExtensionTypes)):
+            input_type = input_type.type
         np_to_snowml_type_mapping = {i._numpy_type: i for i in DataType}
         # Add datetime types:
@@ -88,12 +109,12 @@ class DataType(Enum):
             np_to_snowml_type_mapping[f"datetime64[{res}]"] = DataType.TIMESTAMP_NTZ
         for potential_type in np_to_snowml_type_mapping.keys():
-            if np.can_cast(np_type, potential_type, casting="no"):
+            if np.can_cast(input_type, potential_type, casting="no"):
                 # This is used since the same dtype might represented in different ways.
                 return np_to_snowml_type_mapping[potential_type]
         raise snowml_exceptions.SnowflakeMLException(
             error_code=error_codes.NOT_IMPLEMENTED,
-            original_exception=NotImplementedError(f"Type {np_type} is not supported as a DataType."),
+            original_exception=NotImplementedError(f"Type {input_type} is not supported as a DataType."),
         )
     @classmethod
@@ -212,6 +233,7 @@ class FeatureSpec(BaseFeatureSpec):
         name: str,
         dtype: DataType,
         shape: Optional[Tuple[int, ...]] = None,
+        nullable: bool = True,
     ) -> None:
         """
         Initialize a feature.
@@ -219,6 +241,7 @@ class FeatureSpec(BaseFeatureSpec):
         Args:
             name: Name of the feature.
             dtype: Type of the elements in the feature.
+            nullable: Whether the feature is nullable. Defaults to True.
             shape: Used to represent scalar feature, 1-d feature list,
                 or n-d tensor. Use -1 to represent variable length. Defaults to None.
@@ -227,6 +250,7 @@ class FeatureSpec(BaseFeatureSpec):
                     - (2,): 1d list with a fixed length of 2.
                     - (-1,): 1d list with variable length, used for ragged tensor representation.
                     - (d1, d2, d3): 3d tensor.
+            nullable: Whether the feature is nullable. Defaults to True.
         Raises:
             SnowflakeMLException: TypeError: When the dtype input type is incorrect.
@@ -248,6 +272,8 @@ class FeatureSpec(BaseFeatureSpec):
             )
         self._shape = shape
+        self._nullable = nullable
     def as_snowpark_type(self) -> spt.DataType:
         result_type = self._dtype.as_snowpark_type()
         if not self._shape:
@@ -256,13 +282,34 @@ class FeatureSpec(BaseFeatureSpec):
             result_type = spt.ArrayType(result_type)
         return result_type
-    def as_dtype(self) -> Union[npt.DTypeLike, str]:
+    def as_dtype(self) -> Union[npt.DTypeLike, str, PandasExtensionTypes]:
         """Convert to corresponding local Type."""
         if not self._shape:
             # scalar dtype: use keys from `np.sctypeDict` to prevent unit-less dtype 'datetime64'
             if "datetime64" in self._dtype._value:
                 return self._dtype._value
-            return self._dtype._numpy_type
+            np_type = self._dtype._numpy_type
+            if self._nullable:
+                np_to_pd_dtype_mapping = {
+                    np.int8: pd.Int8Dtype(),
+                    np.int16: pd.Int16Dtype(),
+                    np.int32: pd.Int32Dtype(),
+                    np.int64: pd.Int64Dtype(),
+                    np.uint8: pd.UInt8Dtype(),
+                    np.uint16: pd.UInt16Dtype(),
+                    np.uint32: pd.UInt32Dtype(),
+                    np.uint64: pd.UInt64Dtype(),
+                    np.float32: pd.Float32Dtype(),
+                    np.float64: pd.Float64Dtype(),
+                    np.bool_: pd.BooleanDtype(),
+                    np.str_: pd.StringDtype(),
+                }
+                return np_to_pd_dtype_mapping.get(np_type, np_type)  # type: ignore[arg-type]
+            return np_type
         return np.object_
     def __eq__(self, other: object) -> bool:
@@ -273,7 +320,10 @@ class FeatureSpec(BaseFeatureSpec):
     def __repr__(self) -> str:
         shape_str = f", shape={repr(self._shape)}" if self._shape else ""
-        return f"FeatureSpec(dtype={repr(self._dtype)}, name={repr(self._name)}{shape_str})"
+        return (
+            f"FeatureSpec(dtype={repr(self._dtype)}, "
+            f"name={repr(self._name)}{shape_str}, nullable={repr(self._nullable)})"
+        )
     def to_dict(self) -> Dict[str, Any]:
         """Serialize the feature group into a dict.
@@ -281,10 +331,7 @@ class FeatureSpec(BaseFeatureSpec):
         Returns:
             A dict that serializes the feature group.
         """
-        base_dict: Dict[str, Any] = {
-            "type": self._dtype.name,
-            "name": self._name,
-        }
+        base_dict: Dict[str, Any] = {"type": self._dtype.name, "name": self._name, "nullable": self._nullable}
         if self._shape is not None:
             base_dict["shape"] = self._shape
         return base_dict
@@ -304,7 +351,9 @@ class FeatureSpec(BaseFeatureSpec):
         if shape:
             shape = tuple(shape)
         type = DataType[input_dict["type"]]
-        return FeatureSpec(name=name, dtype=type, shape=shape)
+        # If nullable is not provided, default to False for backward compatibility.
+        nullable = input_dict.get("nullable", False)
+        return FeatureSpec(name=name, dtype=type, shape=shape, nullable=nullable)
     @classmethod
     def from_mlflow_spec(
@@ -475,10 +524,8 @@ class ModelSignature:
         sig_outs = loaded["outputs"]
         sig_inputs = loaded["inputs"]
-        deserialize_spec: Callable[[Dict[str, Any]], BaseFeatureSpec] = (
-            lambda sig_spec: FeatureGroupSpec.from_dict(sig_spec)
-            if "feature_group" in sig_spec
-            else FeatureSpec.from_dict(sig_spec)
+        deserialize_spec: Callable[[Dict[str, Any]], BaseFeatureSpec] = lambda sig_spec: (
+            FeatureGroupSpec.from_dict(sig_spec) if "feature_group" in sig_spec else FeatureSpec.from_dict(sig_spec)
         )
         return ModelSignature(

snowflake/ml/model/_signatures/pandas_handler.py CHANGED Viewed

@@ -1,4 +1,5 @@
-from typing import Literal, Sequence
+import warnings
+from typing import Literal, Sequence, Union
 import numpy as np
 import pandas as pd
@@ -14,8 +15,8 @@ from snowflake.ml.model._signatures import base_handler, core, utils
 class PandasDataFrameHandler(base_handler.BaseDataHandler[pd.DataFrame]):
     @staticmethod
-    def can_handle(data: model_types.SupportedDataType) -> TypeGuard[pd.DataFrame]:
-        return isinstance(data, pd.DataFrame)
+    def can_handle(data: model_types.SupportedDataType) -> TypeGuard[Union[pd.DataFrame, pd.Series]]:
+        return isinstance(data, pd.DataFrame) or isinstance(data, pd.Series)
     @staticmethod
     def count(data: pd.DataFrame) -> int:
@@ -26,7 +27,17 @@ class PandasDataFrameHandler(base_handler.BaseDataHandler[pd.DataFrame]):
         return data.head(min(PandasDataFrameHandler.count(data), PandasDataFrameHandler.SIG_INFER_ROWS_COUNT_LIMIT))
     @staticmethod
-    def validate(data: pd.DataFrame) -> None:
+    def validate(data: Union[pd.DataFrame, pd.Series]) -> None:
+        if isinstance(data, pd.Series):
+            # check if the series is empty and throw error
+            if data.empty:
+                raise snowml_exceptions.SnowflakeMLException(
+                    error_code=error_codes.INVALID_DATA,
+                    original_exception=ValueError("Data Validation Error: Empty data is found."),
+                )
+            # convert the series to a dataframe
+            data = data.to_frame()
         df_cols = data.columns
         if df_cols.has_duplicates:  # Rule out categorical index with duplicates
@@ -60,21 +71,44 @@ class PandasDataFrameHandler(base_handler.BaseDataHandler[pd.DataFrame]):
         df_col_dtypes = [data[col].dtype for col in data.columns]
         for df_col, df_col_dtype in zip(df_cols, df_col_dtypes):
+            df_col_data = data[df_col]
+            if df_col_data.isnull().all():
+                raise snowml_exceptions.SnowflakeMLException(
+                    error_code=error_codes.INVALID_DATA,
+                    original_exception=ValueError(
+                        f"Data Validation Error: There is no non-null data in column {df_col}."
+                    ),
+                )
+            if df_col_data.isnull().any():
+                warnings.warn(
+                    (
+                        f"Null value detected in column {df_col}, model signature inference might not accurate, "
+                        "or your prediction might fail if your model does not support null input. If this is not "
+                        "expected, please check your input dataframe."
+                    ),
+                    category=UserWarning,
+                    stacklevel=2,
+                )
+                df_col_data = utils.series_dropna(df_col_data)
+                df_col_dtype = df_col_data.dtype
             if df_col_dtype == np.dtype("O"):
                 # Check if all objects have the same type
-                if not all(isinstance(data_row, type(data[df_col].iloc[0])) for data_row in data[df_col]):
+                if not all(isinstance(data_row, type(df_col_data.iloc[0])) for data_row in df_col_data):
                     raise snowml_exceptions.SnowflakeMLException(
                         error_code=error_codes.INVALID_DATA,
                         original_exception=ValueError(
-                            f"Data Validation Error: Inconsistent type of object found in column data {data[df_col]}."
+                            "Data Validation Error: "
+                            + f"Inconsistent type of element in object found in column data {df_col_data}."
                         ),
                     )
-                if isinstance(data[df_col].iloc[0], list):
-                    arr = utils.convert_list_to_ndarray(data[df_col].iloc[0])
+                if isinstance(df_col_data.iloc[0], list):
+                    arr = utils.convert_list_to_ndarray(df_col_data.iloc[0])
                     arr_dtype = core.DataType.from_numpy_type(arr.dtype)
-                    converted_data_list = [utils.convert_list_to_ndarray(data_row) for data_row in data[df_col]]
+                    converted_data_list = [utils.convert_list_to_ndarray(data_row) for data_row in df_col_data]
                     if not all(
                         core.DataType.from_numpy_type(converted_data.dtype) == arr_dtype
@@ -84,32 +118,37 @@ class PandasDataFrameHandler(base_handler.BaseDataHandler[pd.DataFrame]):
                             error_code=error_codes.INVALID_DATA,
                             original_exception=ValueError(
                                 "Data Validation Error: "
-                                + f"Inconsistent type of element in object found in column data {data[df_col]}."
+                                + f"Inconsistent type of element in object found in column data {df_col_data}."
                             ),
                         )
-                elif isinstance(data[df_col].iloc[0], np.ndarray):
-                    arr_dtype = core.DataType.from_numpy_type(data[df_col].iloc[0].dtype)
+                elif isinstance(df_col_data.iloc[0], np.ndarray):
+                    arr_dtype = core.DataType.from_numpy_type(df_col_data.iloc[0].dtype)
-                    if not all(core.DataType.from_numpy_type(data_row.dtype) == arr_dtype for data_row in data[df_col]):
+                    if not all(core.DataType.from_numpy_type(data_row.dtype) == arr_dtype for data_row in df_col_data):
                         raise snowml_exceptions.SnowflakeMLException(
                             error_code=error_codes.INVALID_DATA,
                             original_exception=ValueError(
                                 "Data Validation Error: "
-                                + f"Inconsistent type of element in object found in column data {data[df_col]}."
+                                + f"Inconsistent type of element in object found in column data {df_col_data}."
                             ),
                         )
-                elif not isinstance(data[df_col].iloc[0], (str, bytes)):
+                elif not isinstance(df_col_data.iloc[0], (str, bytes)):
                     raise snowml_exceptions.SnowflakeMLException(
                         error_code=error_codes.INVALID_DATA,
                         original_exception=ValueError(
-                            f"Data Validation Error: Unsupported type confronted in {data[df_col]}"
+                            f"Data Validation Error: Unsupported type confronted in {df_col_data}"
                         ),
                     )
     @staticmethod
-    def infer_signature(data: pd.DataFrame, role: Literal["input", "output"]) -> Sequence[core.BaseFeatureSpec]:
+    def infer_signature(
+        data: Union[pd.DataFrame, pd.Series],
+        role: Literal["input", "output"],
+    ) -> Sequence[core.BaseFeatureSpec]:
         feature_prefix = f"{PandasDataFrameHandler.FEATURE_PREFIX}_"
+        if isinstance(data, pd.Series):
+            data = data.to_frame()
         df_cols = data.columns
         role_prefix = (
             PandasDataFrameHandler.INPUT_PREFIX if role == "input" else PandasDataFrameHandler.OUTPUT_PREFIX
@@ -123,30 +162,51 @@ class PandasDataFrameHandler(base_handler.BaseDataHandler[pd.DataFrame]):
         specs = []
         for df_col, df_col_dtype, ft_name in zip(df_cols, df_col_dtypes, ft_names):
+            df_col_data = data[df_col]
+            if df_col_data.isnull().any():
+                df_col_data = utils.series_dropna(df_col_data)
+            df_col_dtype = df_col_data.dtype
             if df_col_dtype == np.dtype("O"):
-                if isinstance(data[df_col].iloc[0], list):
-                    arr = utils.convert_list_to_ndarray(data[df_col].iloc[0])
+                if isinstance(df_col_data.iloc[0], list):
+                    arr = utils.convert_list_to_ndarray(df_col_data.iloc[0])
                     arr_dtype = core.DataType.from_numpy_type(arr.dtype)
-                    ft_shape = np.shape(data[df_col].iloc[0])
+                    ft_shape = np.shape(df_col_data.iloc[0])
-                    converted_data_list = [utils.convert_list_to_ndarray(data_row) for data_row in data[df_col]]
+                    converted_data_list = [utils.convert_list_to_ndarray(data_row) for data_row in df_col_data]
                     if not all(np.shape(converted_data) == ft_shape for converted_data in converted_data_list):
                         ft_shape = (-1,)
                     specs.append(core.FeatureSpec(dtype=arr_dtype, name=ft_name, shape=ft_shape))
-                elif isinstance(data[df_col].iloc[0], np.ndarray):
-                    arr_dtype = core.DataType.from_numpy_type(data[df_col].iloc[0].dtype)
-                    ft_shape = np.shape(data[df_col].iloc[0])
+                elif isinstance(df_col_data.iloc[0], np.ndarray):
+                    arr_dtype = core.DataType.from_numpy_type(df_col_data.iloc[0].dtype)
+                    ft_shape = np.shape(df_col_data.iloc[0])
-                    if not all(np.shape(data_row) == ft_shape for data_row in data[df_col]):
+                    if not all(np.shape(data_row) == ft_shape for data_row in df_col_data):
                         ft_shape = (-1,)
                     specs.append(core.FeatureSpec(dtype=arr_dtype, name=ft_name, shape=ft_shape))
-                elif isinstance(data[df_col].iloc[0], str):
+                elif isinstance(df_col_data.iloc[0], str):
                     specs.append(core.FeatureSpec(dtype=core.DataType.STRING, name=ft_name))
-                elif isinstance(data[df_col].iloc[0], bytes):
+                elif isinstance(df_col_data.iloc[0], bytes):
                     specs.append(core.FeatureSpec(dtype=core.DataType.BYTES, name=ft_name))
+            elif isinstance(df_col_dtype, pd.CategoricalDtype):
+                category_dtype = df_col_dtype.categories.dtype
+                if category_dtype == np.dtype("O"):
+                    if isinstance(df_col_dtype.categories[0], str):
+                        specs.append(core.FeatureSpec(dtype=core.DataType.STRING, name=ft_name))
+                    elif isinstance(df_col_dtype.categories[0], bytes):
+                        specs.append(core.FeatureSpec(dtype=core.DataType.BYTES, name=ft_name))
+                    else:
+                        raise snowml_exceptions.SnowflakeMLException(
+                            error_code=error_codes.INVALID_DATA,
+                            original_exception=ValueError(
+                                f"Data Validation Error: Unsupported type confronted in {df_col_dtype.categories[0]}"
+                            ),
+                        )
+                else:
+                    specs.append(core.FeatureSpec(dtype=core.DataType.from_numpy_type(category_dtype), name=ft_name))
             elif isinstance(data[df_col].iloc[0], np.datetime64):
                 specs.append(core.FeatureSpec(dtype=core.DataType.TIMESTAMP_NTZ, name=ft_name))
             else:

snowflake/ml/model/_signatures/pytorch_handler.py CHANGED Viewed

@@ -72,10 +72,10 @@ class SeqOfPyTorchTensorHandler(base_handler.BaseDataHandler[Sequence["torch.Ten
             dtype = core.DataType.from_torch_type(data_col.dtype)
             ft_name = f"{role_prefix}{feature_prefix}{i}"
             if len(data_col.shape) == 1:
-                features.append(core.FeatureSpec(dtype=dtype, name=ft_name))
+                features.append(core.FeatureSpec(dtype=dtype, name=ft_name, nullable=False))
             else:
                 ft_shape = tuple(data_col.shape[1:])
-                features.append(core.FeatureSpec(dtype=dtype, name=ft_name, shape=ft_shape))
+                features.append(core.FeatureSpec(dtype=dtype, name=ft_name, shape=ft_shape, nullable=False))
         return features
     @staticmethod

snowflake/ml/model/_signatures/snowpark_handler.py CHANGED Viewed

@@ -82,7 +82,8 @@ class SnowparkDataFrameHandler(base_handler.BaseDataHandler[snowflake.snowpark.D
                     identifier.get_unescaped_names(field.name)
                 ].map(json.loads)
         # Only when the feature is not from inference, we are confident to do the type casting.
-        # Otherwise, dtype_map will be empty
+        # Otherwise, dtype_map will be empty.
+        # Errors are ignored to make sure None won't be converted and won't raise Error
         df_local = df_local.astype(dtype=dtype_map)
         return df_local

snowflake-ml-python 1.6.4__py3-none-any.whl → 1.7.1__py3-none-any.whl

snowflake-ml-python 1.6.4py3-none-any.whl → 1.7.1py3-none-any.whl