PyPI - snowflake-ml-python - Versions diffs - 1.5.4__py3-none-any.whl → 1.6.1__py3-none-any.whl - Mend

snowflake-ml-python 1.5.4py3-none-any.whl → 1.6.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (100) hide show

snowflake/ml/model/_packager/model_handlers/sklearn.py CHANGED Viewed

@@ -6,6 +6,7 @@ import numpy as np
 import pandas as pd
 from typing_extensions import TypeGuard, Unpack
+import snowflake.snowpark.dataframe as sp_df
 from snowflake.ml._internal import type_utils
 from snowflake.ml.model import custom_model, model_signature, type_hints as model_types
 from snowflake.ml.model._packager.model_env import model_env
@@ -14,8 +15,13 @@ from snowflake.ml.model._packager.model_handlers_migrator import base_migrator
 from snowflake.ml.model._packager.model_meta import (
     model_blob_meta,
     model_meta as model_meta_api,
+    model_meta_schema,
+)
+from snowflake.ml.model._signatures import (
+    numpy_handler,
+    snowpark_handler,
+    utils as model_signature_utils,
 )
-from snowflake.ml.model._signatures import numpy_handler, utils as model_signature_utils
 if TYPE_CHECKING:
     import sklearn.base
@@ -36,6 +42,27 @@ class SKLModelHandler(_base.BaseModelHandler[Union["sklearn.base.BaseEstimator",
     DEFAULT_TARGET_METHODS = ["predict", "transform", "predict_proba", "predict_log_proba", "decision_function"]
+    @classmethod
+    def get_model_objective(
+        cls, model: Union["sklearn.base.BaseEstimator", "sklearn.pipeline.Pipeline"]
+    ) -> model_meta_schema.ModelObjective:
+        import sklearn.pipeline
+        from sklearn.base import is_classifier, is_regressor
+        if isinstance(model, sklearn.pipeline.Pipeline):
+            return model_meta_schema.ModelObjective.UNKNOWN
+        if is_regressor(model):
+            return model_meta_schema.ModelObjective.REGRESSION
+        if is_classifier(model):
+            classes_list = getattr(model, "classes_", [])
+            num_classes = getattr(model, "n_classes_", None) or len(classes_list)
+            if isinstance(num_classes, int):
+                if num_classes > 2:
+                    return model_meta_schema.ModelObjective.MULTI_CLASSIFICATION
+                return model_meta_schema.ModelObjective.BINARY_CLASSIFICATION
+            return model_meta_schema.ModelObjective.UNKNOWN
+        return model_meta_schema.ModelObjective.UNKNOWN
     @classmethod
     def can_handle(
         cls,
@@ -79,11 +106,33 @@ class SKLModelHandler(_base.BaseModelHandler[Union["sklearn.base.BaseEstimator",
         is_sub_model: Optional[bool] = False,
         **kwargs: Unpack[model_types.SKLModelSaveOptions],
     ) -> None:
+        enable_explainability = kwargs.get("enable_explainability", False)
         import sklearn.base
         import sklearn.pipeline
         assert isinstance(model, sklearn.base.BaseEstimator) or isinstance(model, sklearn.pipeline.Pipeline)
+        enable_explainability = kwargs.get("enable_explainability", False)
+        if enable_explainability:
+            # TODO: Currently limited to pandas df, need to extend to other types.
+            if sample_input_data is None or not (
+                isinstance(sample_input_data, pd.DataFrame) or isinstance(sample_input_data, sp_df.DataFrame)
+            ):
+                raise ValueError(
+                    "Sample input data is required to enable explainability. Currently we only support this for "
+                    + "`pandas.DataFrame` and `snowflake.snowpark.dataframe.DataFrame`."
+                )
+            sample_input_data_pandas = (
+                sample_input_data
+                if isinstance(sample_input_data, pd.DataFrame)
+                else snowpark_handler.SnowparkDataFrameHandler.convert_to_df(sample_input_data)
+            )
+            data_blob_path = os.path.join(model_blobs_dir_path, cls.EXPLAIN_ARTIFACTS_DIR)
+            os.makedirs(data_blob_path, exist_ok=True)
+            with open(os.path.join(data_blob_path, name + cls.BG_DATA_FILE_SUFFIX), "wb") as f:
+                sample_input_data_pandas.to_parquet(f)
         if not is_sub_model:
             target_methods = handlers_utils.get_target_methods(
                 model=model,
@@ -110,19 +159,36 @@ class SKLModelHandler(_base.BaseModelHandler[Union["sklearn.base.BaseEstimator",
                 get_prediction_fn=get_prediction,
             )
+            if enable_explainability:
+                output_type = model_signature.DataType.DOUBLE
+                if cls.get_model_objective(model) == model_meta_schema.ModelObjective.MULTI_CLASSIFICATION:
+                    output_type = model_signature.DataType.STRING
+                model_meta = handlers_utils.add_explain_method_signature(
+                    model_meta=model_meta,
+                    explain_method="explain",
+                    target_method="predict",
+                    output_return_type=output_type,
+                )
         model_blob_path = os.path.join(model_blobs_dir_path, name)
         os.makedirs(model_blob_path, exist_ok=True)
-        with open(os.path.join(model_blob_path, cls.MODELE_BLOB_FILE_OR_DIR), "wb") as f:
+        with open(os.path.join(model_blob_path, cls.MODEL_BLOB_FILE_OR_DIR), "wb") as f:
             cloudpickle.dump(model, f)
         base_meta = model_blob_meta.ModelBlobMeta(
             name=name,
             model_type=cls.HANDLER_TYPE,
             handler_version=cls.HANDLER_VERSION,
-            path=cls.MODELE_BLOB_FILE_OR_DIR,
+            path=cls.MODEL_BLOB_FILE_OR_DIR,
         )
         model_meta.models[name] = base_meta
         model_meta.min_snowpark_ml_version = cls._MIN_SNOWPARK_ML_VERSION
+        if enable_explainability:
+            model_meta.env.include_if_absent(
+                [model_env.ModelDependency(requirement="shap", pip_name="shap")],
+                check_local_version=True,
+            )
         model_meta.env.include_if_absent(
             [model_env.ModelDependency(requirement="scikit-learn", pip_name="scikit-learn")], check_local_version=True
         )
@@ -153,6 +219,7 @@ class SKLModelHandler(_base.BaseModelHandler[Union["sklearn.base.BaseEstimator",
         cls,
         raw_model: Union["sklearn.base.BaseEstimator", "sklearn.pipeline.Pipeline"],
         model_meta: model_meta_api.ModelMetadata,
+        background_data: Optional[pd.DataFrame] = None,
         **kwargs: Unpack[model_types.SKLModelLoadOptions],
     ) -> custom_model.CustomModel:
         from snowflake.ml.model import custom_model
@@ -165,6 +232,7 @@ class SKLModelHandler(_base.BaseModelHandler[Union["sklearn.base.BaseEstimator",
                 raw_model: Union["sklearn.base.BaseEstimator", "sklearn.pipeline.Pipeline"],
                 signature: model_signature.ModelSignature,
                 target_method: str,
+                background_data: Optional[pd.DataFrame],
             ) -> Callable[[custom_model.CustomModel, pd.DataFrame], pd.DataFrame]:
                 @custom_model.inference_api
                 def fn(self: custom_model.CustomModel, X: pd.DataFrame) -> pd.DataFrame:
@@ -179,11 +247,26 @@ class SKLModelHandler(_base.BaseModelHandler[Union["sklearn.base.BaseEstimator",
                     return model_signature_utils.rename_pandas_df(df, signature.outputs)
+                @custom_model.inference_api
+                def explain_fn(self: custom_model.CustomModel, X: pd.DataFrame) -> pd.DataFrame:
+                    import shap
+                    # TODO: if not resolved by explainer, we need to pass the callable function
+                    try:
+                        explainer = shap.Explainer(raw_model, background_data)
+                        df = handlers_utils.convert_explanations_to_2D_df(raw_model, explainer(X).values)
+                    except TypeError as e:
+                        raise ValueError(f"Explanation for this model type not supported yet: {str(e)}")
+                    return model_signature_utils.rename_pandas_df(df, signature.outputs)
+                if target_method == "explain":
+                    return explain_fn
                 return fn
             type_method_dict = {}
             for target_method_name, sig in model_meta.signatures.items():
-                type_method_dict[target_method_name] = fn_factory(raw_model, sig, target_method_name)
+                type_method_dict[target_method_name] = fn_factory(raw_model, sig, target_method_name, background_data)
             _SKLModel = type(
                 "_SKLModel",

snowflake/ml/model/_packager/model_handlers/snowmlmodel.py CHANGED Viewed

@@ -73,6 +73,10 @@ class SnowMLModelHandler(_base.BaseModelHandler["BaseEstimator"]):
         is_sub_model: Optional[bool] = False,
         **kwargs: Unpack[model_types.SNOWModelSaveOptions],
     ) -> None:
+        enable_explainability = kwargs.get("enable_explainability", False)
+        if enable_explainability:
+            raise NotImplementedError("Explainability is not supported for Snowpark ML model.")
         from snowflake.ml.modeling.framework.base import BaseEstimator
         assert isinstance(model, BaseEstimator)
@@ -103,13 +107,13 @@ class SnowMLModelHandler(_base.BaseModelHandler["BaseEstimator"]):
         model_blob_path = os.path.join(model_blobs_dir_path, name)
         os.makedirs(model_blob_path, exist_ok=True)
-        with open(os.path.join(model_blob_path, cls.MODELE_BLOB_FILE_OR_DIR), "wb") as f:
+        with open(os.path.join(model_blob_path, cls.MODEL_BLOB_FILE_OR_DIR), "wb") as f:
             cloudpickle.dump(model, f)
         base_meta = model_blob_meta.ModelBlobMeta(
             name=name,
             model_type=cls.HANDLER_TYPE,
             handler_version=cls.HANDLER_VERSION,
-            path=cls.MODELE_BLOB_FILE_OR_DIR,
+            path=cls.MODEL_BLOB_FILE_OR_DIR,
         )
         model_meta.models[name] = base_meta
         model_meta.min_snowpark_ml_version = cls._MIN_SNOWPARK_ML_VERSION
@@ -146,6 +150,7 @@ class SnowMLModelHandler(_base.BaseModelHandler["BaseEstimator"]):
         cls,
         raw_model: "BaseEstimator",
         model_meta: model_meta_api.ModelMetadata,
+        background_data: Optional[pd.DataFrame] = None,
         **kwargs: Unpack[model_types.SNOWModelLoadOptions],
     ) -> custom_model.CustomModel:
         from snowflake.ml.model import custom_model

snowflake/ml/model/_packager/model_handlers/tensorflow.py CHANGED Viewed

@@ -36,7 +36,7 @@ class TensorFlowHandler(_base.BaseModelHandler["tensorflow.Module"]):
     _MIN_SNOWPARK_ML_VERSION = "1.0.12"
     _HANDLER_MIGRATOR_PLANS: Dict[str, Type[base_migrator.BaseModelHandlerMigrator]] = {}
-    MODELE_BLOB_FILE_OR_DIR = "model"
+    MODEL_BLOB_FILE_OR_DIR = "model"
     DEFAULT_TARGET_METHODS = ["__call__"]
     @classmethod
@@ -68,6 +68,10 @@ class TensorFlowHandler(_base.BaseModelHandler["tensorflow.Module"]):
         is_sub_model: Optional[bool] = False,
         **kwargs: Unpack[model_types.TensorflowSaveOptions],
     ) -> None:
+        enable_explainability = kwargs.get("enable_explainability", False)
+        if enable_explainability:
+            raise NotImplementedError("Explainability is not supported for Tensorflow model.")
         import tensorflow
         assert isinstance(model, tensorflow.Module)
@@ -114,15 +118,15 @@ class TensorFlowHandler(_base.BaseModelHandler["tensorflow.Module"]):
         model_blob_path = os.path.join(model_blobs_dir_path, name)
         os.makedirs(model_blob_path, exist_ok=True)
         if isinstance(model, tensorflow.keras.Model):
-            tensorflow.keras.models.save_model(model, os.path.join(model_blob_path, cls.MODELE_BLOB_FILE_OR_DIR))
+            tensorflow.keras.models.save_model(model, os.path.join(model_blob_path, cls.MODEL_BLOB_FILE_OR_DIR))
         else:
-            tensorflow.saved_model.save(model, os.path.join(model_blob_path, cls.MODELE_BLOB_FILE_OR_DIR))
+            tensorflow.saved_model.save(model, os.path.join(model_blob_path, cls.MODEL_BLOB_FILE_OR_DIR))
         base_meta = model_blob_meta.ModelBlobMeta(
             name=name,
             model_type=cls.HANDLER_TYPE,
             handler_version=cls.HANDLER_VERSION,
-            path=cls.MODELE_BLOB_FILE_OR_DIR,
+            path=cls.MODEL_BLOB_FILE_OR_DIR,
         )
         model_meta.models[name] = base_meta
         model_meta.min_snowpark_ml_version = cls._MIN_SNOWPARK_ML_VERSION
@@ -156,6 +160,7 @@ class TensorFlowHandler(_base.BaseModelHandler["tensorflow.Module"]):
         cls,
         raw_model: "tensorflow.Module",
         model_meta: model_meta_api.ModelMetadata,
+        background_data: Optional[pd.DataFrame] = None,
         **kwargs: Unpack[model_types.TensorflowLoadOptions],
     ) -> custom_model.CustomModel:
         import tensorflow

snowflake/ml/model/_packager/model_handlers/torchscript.py CHANGED Viewed

@@ -34,7 +34,7 @@ class TorchScriptHandler(_base.BaseModelHandler["torch.jit.ScriptModule"]):  # t
     _MIN_SNOWPARK_ML_VERSION = "1.0.12"
     _HANDLER_MIGRATOR_PLANS: Dict[str, Type[base_migrator.BaseModelHandlerMigrator]] = {}
-    MODELE_BLOB_FILE_OR_DIR = "model.pt"
+    MODEL_BLOB_FILE_OR_DIR = "model.pt"
     DEFAULT_TARGET_METHODS = ["forward"]
     @classmethod
@@ -66,6 +66,10 @@ class TorchScriptHandler(_base.BaseModelHandler["torch.jit.ScriptModule"]):  # t
         is_sub_model: Optional[bool] = False,
         **kwargs: Unpack[model_types.TorchScriptSaveOptions],
     ) -> None:
+        enable_explainability = kwargs.get("enable_explainability", False)
+        if enable_explainability:
+            raise NotImplementedError("Explainability is not supported for Torch Script model.")
         import torch
         assert isinstance(model, torch.jit.ScriptModule)  # type:ignore[attr-defined]
@@ -106,13 +110,13 @@ class TorchScriptHandler(_base.BaseModelHandler["torch.jit.ScriptModule"]):  # t
         model_blob_path = os.path.join(model_blobs_dir_path, name)
         os.makedirs(model_blob_path, exist_ok=True)
-        with open(os.path.join(model_blob_path, cls.MODELE_BLOB_FILE_OR_DIR), "wb") as f:
+        with open(os.path.join(model_blob_path, cls.MODEL_BLOB_FILE_OR_DIR), "wb") as f:
             torch.jit.save(model, f)  # type:ignore[attr-defined]
         base_meta = model_blob_meta.ModelBlobMeta(
             name=name,
             model_type=cls.HANDLER_TYPE,
             handler_version=cls.HANDLER_VERSION,
-            path=cls.MODELE_BLOB_FILE_OR_DIR,
+            path=cls.MODEL_BLOB_FILE_OR_DIR,
         )
         model_meta.models[name] = base_meta
         model_meta.min_snowpark_ml_version = cls._MIN_SNOWPARK_ML_VERSION
@@ -152,6 +156,7 @@ class TorchScriptHandler(_base.BaseModelHandler["torch.jit.ScriptModule"]):  # t
         cls,
         raw_model: "torch.jit.ScriptModule",  # type:ignore[name-defined]
         model_meta: model_meta_api.ModelMetadata,
+        background_data: Optional[pd.DataFrame] = None,
         **kwargs: Unpack[model_types.TorchScriptLoadOptions],
     ) -> custom_model.CustomModel:
         from snowflake.ml.model import custom_model

snowflake/ml/model/_packager/model_handlers/xgboost.py CHANGED Viewed

@@ -1,4 +1,5 @@
 # mypy: disable-error-code="import"
+import json
 import os
 from typing import (
     TYPE_CHECKING,
@@ -44,8 +45,43 @@ class XGBModelHandler(_base.BaseModelHandler[Union["xgboost.Booster", "xgboost.X
     _MIN_SNOWPARK_ML_VERSION = "1.0.12"
     _HANDLER_MIGRATOR_PLANS: Dict[str, Type[base_migrator.BaseModelHandlerMigrator]] = {}
-    MODELE_BLOB_FILE_OR_DIR = "model.ubj"
+    MODEL_BLOB_FILE_OR_DIR = "model.ubj"
     DEFAULT_TARGET_METHODS = ["predict", "predict_proba"]
+    _BINARY_CLASSIFICATION_OBJECTIVE_PREFIX = ["binary:"]
+    _MULTI_CLASSIFICATION_OBJECTIVE_PREFIX = ["multi:"]
+    _RANKING_OBJECTIVE_PREFIX = ["rank:"]
+    _REGRESSION_OBJECTIVE_PREFIX = ["reg:"]
+    @classmethod
+    def get_model_objective(
+        cls, model: Union["xgboost.Booster", "xgboost.XGBModel"]
+    ) -> model_meta_schema.ModelObjective:
+        import xgboost
+        if isinstance(model, xgboost.XGBClassifier) or isinstance(model, xgboost.XGBRFClassifier):
+            num_classes = handlers_utils.get_num_classes_if_exists(model)
+            if num_classes == 2:
+                return model_meta_schema.ModelObjective.BINARY_CLASSIFICATION
+            return model_meta_schema.ModelObjective.MULTI_CLASSIFICATION
+        if isinstance(model, xgboost.XGBRegressor) or isinstance(model, xgboost.XGBRFRegressor):
+            return model_meta_schema.ModelObjective.REGRESSION
+        if isinstance(model, xgboost.XGBRanker):
+            return model_meta_schema.ModelObjective.RANKING
+        model_params = json.loads(model.save_config())
+        model_objective = model_params["learner"]["objective"]
+        for classification_objective in cls._BINARY_CLASSIFICATION_OBJECTIVE_PREFIX:
+            if classification_objective in model_objective:
+                return model_meta_schema.ModelObjective.BINARY_CLASSIFICATION
+        for classification_objective in cls._MULTI_CLASSIFICATION_OBJECTIVE_PREFIX:
+            if classification_objective in model_objective:
+                return model_meta_schema.ModelObjective.MULTI_CLASSIFICATION
+        for ranking_objective in cls._RANKING_OBJECTIVE_PREFIX:
+            if ranking_objective in model_objective:
+                return model_meta_schema.ModelObjective.RANKING
+        for regression_objective in cls._REGRESSION_OBJECTIVE_PREFIX:
+            if regression_objective in model_objective:
+                return model_meta_schema.ModelObjective.REGRESSION
+        return model_meta_schema.ModelObjective.UNKNOWN
     @classmethod
     def can_handle(
@@ -112,15 +148,30 @@ class XGBModelHandler(_base.BaseModelHandler[Union["xgboost.Booster", "xgboost.X
                 sample_input_data=sample_input_data,
                 get_prediction_fn=get_prediction,
             )
+            model_objective = cls.get_model_objective(model)
+            model_meta.model_objective = model_objective
+            if kwargs.get("enable_explainability", True):
+                output_type = model_signature.DataType.DOUBLE
+                if model_objective == model_meta_schema.ModelObjective.MULTI_CLASSIFICATION:
+                    output_type = model_signature.DataType.STRING
+                model_meta = handlers_utils.add_explain_method_signature(
+                    model_meta=model_meta,
+                    explain_method="explain",
+                    target_method="predict",
+                    output_return_type=output_type,
+                )
+                model_meta.function_properties = {
+                    "explain": {model_meta_schema.FunctionProperties.PARTITIONED.value: False}
+                }
         model_blob_path = os.path.join(model_blobs_dir_path, name)
         os.makedirs(model_blob_path, exist_ok=True)
-        model.save_model(os.path.join(model_blob_path, cls.MODELE_BLOB_FILE_OR_DIR))
+        model.save_model(os.path.join(model_blob_path, cls.MODEL_BLOB_FILE_OR_DIR))
         base_meta = model_blob_meta.ModelBlobMeta(
             name=name,
             model_type=cls.HANDLER_TYPE,
             handler_version=cls.HANDLER_VERSION,
-            path=cls.MODELE_BLOB_FILE_OR_DIR,
+            path=cls.MODEL_BLOB_FILE_OR_DIR,
             options=model_meta_schema.XgboostModelBlobOptions({"xgb_estimator_type": model.__class__.__name__}),
         )
         model_meta.models[name] = base_meta
@@ -133,6 +184,12 @@ class XGBModelHandler(_base.BaseModelHandler[Union["xgboost.Booster", "xgboost.X
             ],
             check_local_version=True,
         )
+        if kwargs.get("enable_explainability", True):
+            model_meta.env.include_if_absent(
+                [model_env.ModelDependency(requirement="shap", pip_name="shap")],
+                check_local_version=True,
+            )
+            model_meta.explain_algorithm = model_meta_schema.ModelExplainAlgorithm.SHAP
         model_meta.env.cuda_version = kwargs.get("cuda_version", model_env.DEFAULT_CUDA_VERSION)
     @classmethod
@@ -175,6 +232,7 @@ class XGBModelHandler(_base.BaseModelHandler[Union["xgboost.Booster", "xgboost.X
         cls,
         raw_model: Union["xgboost.Booster", "xgboost.XGBModel"],
         model_meta: model_meta_api.ModelMetadata,
+        background_data: Optional[pd.DataFrame] = None,
         **kwargs: Unpack[model_types.XGBModelLoadOptions],
     ) -> custom_model.CustomModel:
         import xgboost
@@ -206,6 +264,16 @@ class XGBModelHandler(_base.BaseModelHandler[Union["xgboost.Booster", "xgboost.X
                     return model_signature_utils.rename_pandas_df(df, signature.outputs)
+                @custom_model.inference_api
+                def explain_fn(self: custom_model.CustomModel, X: pd.DataFrame) -> pd.DataFrame:
+                    import shap
+                    explainer = shap.TreeExplainer(raw_model)
+                    df = pd.DataFrame(explainer(X).values)
+                    return model_signature_utils.rename_pandas_df(df, signature.outputs)
+                if target_method == "explain":
+                    return explain_fn
                 return fn
             type_method_dict: Dict[str, Any] = {"_raw_model": raw_model}

snowflake/ml/model/_packager/model_meta/model_meta.py CHANGED Viewed

@@ -237,6 +237,7 @@ class ModelMetadata:
         function_properties: A dict mapping function names to dict mapping function property key to value.
         metadata: User provided key-value metadata of the model. Defaults to None.
         creation_timestamp: Unix timestamp when the model metadata is created.
+        model_objective: Model objective like regression, classification etc.
     """
     def telemetry_metadata(self) -> ModelMetadataTelemetryDict:
@@ -260,6 +261,8 @@ class ModelMetadata:
         min_snowpark_ml_version: Optional[str] = None,
         models: Optional[Dict[str, model_blob_meta.ModelBlobMeta]] = None,
         original_metadata_version: Optional[str] = model_meta_schema.MODEL_METADATA_VERSION,
+        model_objective: Optional[model_meta_schema.ModelObjective] = model_meta_schema.ModelObjective.UNKNOWN,
+        explain_algorithm: Optional[model_meta_schema.ModelExplainAlgorithm] = None,
     ) -> None:
         self.name = name
         self.signatures: Dict[str, model_signature.ModelSignature] = dict()
@@ -284,6 +287,11 @@ class ModelMetadata:
         self.original_metadata_version = original_metadata_version
+        self.model_objective: model_meta_schema.ModelObjective = (
+            model_objective or model_meta_schema.ModelObjective.UNKNOWN
+        )
+        self.explain_algorithm: Optional[model_meta_schema.ModelExplainAlgorithm] = explain_algorithm
     @property
     def min_snowpark_ml_version(self) -> str:
         return self._min_snowpark_ml_version.base_version
@@ -321,9 +329,11 @@ class ModelMetadata:
         model_dict = model_meta_schema.ModelMetadataDict(
             {
                 "creation_timestamp": self.creation_timestamp,
-                "env": self.env.save_as_dict(pathlib.Path(model_dir_path)),
+                "env": self.env.save_as_dict(
+                    pathlib.Path(model_dir_path), default_channel_override=env_utils.SNOWFLAKE_CONDA_CHANNEL_URL
+                ),
                 "runtimes": {
-                    runtime_name: runtime.save(pathlib.Path(model_dir_path))
+                    runtime_name: runtime.save(pathlib.Path(model_dir_path), default_channel_override="conda-forge")
                     for runtime_name, runtime in self.runtimes.items()
                 },
                 "metadata": self.metadata,
@@ -333,6 +343,13 @@ class ModelMetadata:
                 "signatures": {func_name: sig.to_dict() for func_name, sig in self.signatures.items()},
                 "version": model_meta_schema.MODEL_METADATA_VERSION,
                 "min_snowpark_ml_version": self.min_snowpark_ml_version,
+                "model_objective": self.model_objective.value,
+                "explainability": (
+                    model_meta_schema.ExplainabilityMetadataDict(algorithm=self.explain_algorithm.value)
+                    if self.explain_algorithm
+                    else None
+                ),
+                "function_properties": self.function_properties,
             }
         )
@@ -370,6 +387,9 @@ class ModelMetadata:
             signatures=loaded_meta["signatures"],
             version=original_loaded_meta_version,
             min_snowpark_ml_version=loaded_meta_min_snowpark_ml_version,
+            model_objective=loaded_meta.get("model_objective", model_meta_schema.ModelObjective.UNKNOWN.value),
+            explainability=loaded_meta.get("explainability", None),
+            function_properties=loaded_meta.get("function_properties", {}),
         )
     @classmethod
@@ -406,6 +426,11 @@ class ModelMetadata:
         else:
             runtimes = None
+        explanation_algorithm_dict = model_dict.get("explainability", None)
+        explanation_algorithm = None
+        if explanation_algorithm_dict:
+            explanation_algorithm = model_meta_schema.ModelExplainAlgorithm(explanation_algorithm_dict["algorithm"])
         return cls(
             name=model_dict["name"],
             model_type=model_dict["model_type"],
@@ -417,4 +442,9 @@ class ModelMetadata:
             min_snowpark_ml_version=model_dict["min_snowpark_ml_version"],
             models=models,
             original_metadata_version=model_dict["version"],
+            model_objective=model_meta_schema.ModelObjective(
+                model_dict.get("model_objective", model_meta_schema.ModelObjective.UNKNOWN.value)
+            ),
+            explain_algorithm=explanation_algorithm,
+            function_properties=model_dict.get("function_properties", {}),
         )

snowflake/ml/model/_packager/model_meta/model_meta_schema.py CHANGED Viewed

@@ -71,6 +71,10 @@ ModelBlobOptions = Union[
 ]
+class ExplainabilityMetadataDict(TypedDict):
+    algorithm: Required[str]
 class ModelBlobMetadataDict(TypedDict):
     name: Required[str]
     model_type: Required[type_hints.SupportedModelHandlerType]
@@ -92,3 +96,18 @@ class ModelMetadataDict(TypedDict):
     signatures: Required[Dict[str, Dict[str, Any]]]
     version: Required[str]
     min_snowpark_ml_version: Required[str]
+    model_objective: Required[str]
+    explainability: NotRequired[Optional[ExplainabilityMetadataDict]]
+    function_properties: NotRequired[Dict[str, Dict[str, Any]]]
+class ModelObjective(Enum):
+    UNKNOWN = "unknown"
+    BINARY_CLASSIFICATION = "binary_classification"
+    MULTI_CLASSIFICATION = "multi_classification"
+    REGRESSION = "regression"
+    RANKING = "ranking"
+class ModelExplainAlgorithm(Enum):
+    SHAP = "shap"

snowflake/ml/model/_packager/model_packager.py CHANGED Viewed

@@ -146,7 +146,8 @@ class ModelPackager:
         m = handler.load_model(self.meta.name, self.meta, model_blobs_path, **options)
         if as_custom_model:
-            m = handler.convert_as_custom_model(m, self.meta, **options)
+            background_data = handler.load_background_data(self.meta.name, model_blobs_path)
+            m = handler.convert_as_custom_model(m, self.meta, background_data, **options)
             assert isinstance(m, custom_model.CustomModel)
         self.model = m

snowflake/ml/model/_packager/model_runtime/model_runtime.py CHANGED Viewed

@@ -35,7 +35,7 @@ class ModelRuntime:
         self,
         name: str,
         env: model_env.ModelEnv,
-        imports: Optional[List[pathlib.PurePosixPath]] = None,
+        imports: Optional[List[str]] = None,
         is_gpu: bool = False,
         loading_from_file: bool = False,
     ) -> None:
@@ -67,7 +67,9 @@ class ModelRuntime:
     def runtime_rel_path(self) -> pathlib.PurePosixPath:
         return pathlib.PurePosixPath(ModelRuntime.RUNTIME_DIR_REL_PATH) / self.name
-    def save(self, packager_path: pathlib.Path) -> model_meta_schema.ModelRuntimeDict:
+    def save(
+        self, packager_path: pathlib.Path, default_channel_override: str = env_utils.SNOWFLAKE_CONDA_CHANNEL_URL
+    ) -> model_meta_schema.ModelRuntimeDict:
         runtime_base_path = packager_path / self.runtime_rel_path
         runtime_base_path.mkdir(parents=True, exist_ok=True)
@@ -75,12 +77,12 @@ class ModelRuntime:
             snowpark_ml_lib_path = runtime_base_path / "snowflake-ml-python.zip"
             file_utils.zip_python_package(str(snowpark_ml_lib_path), "snowflake.ml")
             snowpark_ml_lib_rel_path = pathlib.PurePosixPath(snowpark_ml_lib_path.relative_to(packager_path).as_posix())
-            self.imports.append(snowpark_ml_lib_rel_path)
+            self.imports.append(str(snowpark_ml_lib_rel_path))
         self.runtime_env.conda_env_rel_path = self.runtime_rel_path / self.runtime_env.conda_env_rel_path
         self.runtime_env.pip_requirements_rel_path = self.runtime_rel_path / self.runtime_env.pip_requirements_rel_path
-        env_dict = self.runtime_env.save_as_dict(packager_path)
+        env_dict = self.runtime_env.save_as_dict(packager_path, default_channel_override=default_channel_override)
         return model_meta_schema.ModelRuntimeDict(
             imports=list(map(str, self.imports)),
@@ -108,6 +110,4 @@ class ModelRuntime:
             warnings.simplefilter("ignore")
             env.load_from_conda_file(packager_path / conda_env_rel_path)
             env.load_from_pip_file(packager_path / pip_requirements_rel_path)
-        return ModelRuntime(
-            name=name, env=env, imports=list(map(pathlib.PurePosixPath, loaded_dict["imports"])), loading_from_file=True
-        )
+        return ModelRuntime(name=name, env=env, imports=loaded_dict["imports"], loading_from_file=True)

snowflake/ml/model/model_signature.py CHANGED Viewed

@@ -232,7 +232,7 @@ def _validate_pandas_df(data: pd.DataFrame, features: Sequence[core.BaseFeatureS
                     ),
                 )
         else:
-            if isinstance(data_col[0], list):
+            if isinstance(data_col.iloc[0], list):
                 if not ft_shape:
                     raise snowml_exceptions.SnowflakeMLException(
                         error_code=error_codes.INVALID_DATA,
@@ -266,7 +266,7 @@ def _validate_pandas_df(data: pd.DataFrame, features: Sequence[core.BaseFeatureS
                             ),
                         )
-            elif isinstance(data_col[0], np.ndarray):
+            elif isinstance(data_col.iloc[0], np.ndarray):
                 if not ft_shape:
                     raise snowml_exceptions.SnowflakeMLException(
                         error_code=error_codes.INVALID_DATA,
@@ -297,7 +297,7 @@ def _validate_pandas_df(data: pd.DataFrame, features: Sequence[core.BaseFeatureS
                             ),
                         )
-            elif isinstance(data_col[0], str):
+            elif isinstance(data_col.iloc[0], str):
                 if ft_shape is not None:
                     raise snowml_exceptions.SnowflakeMLException(
                         error_code=error_codes.INVALID_DATA,
@@ -316,7 +316,7 @@ def _validate_pandas_df(data: pd.DataFrame, features: Sequence[core.BaseFeatureS
                         ),
                     )
-            elif isinstance(data_col[0], bytes):
+            elif isinstance(data_col.iloc[0], bytes):
                 if ft_shape is not None:
                     raise snowml_exceptions.SnowflakeMLException(
                         error_code=error_codes.INVALID_DATA,

snowflake/ml/model/type_hints.py CHANGED Viewed

@@ -232,6 +232,8 @@ class BaseModelSaveOption(TypedDict):
     _legacy_save: NotRequired[bool]
     function_type: NotRequired[Literal["FUNCTION", "TABLE_FUNCTION"]]
     method_options: NotRequired[Dict[str, ModelMethodSaveOptions]]
+    include_pip_dependencies: NotRequired[bool]
+    enable_explainability: NotRequired[bool]
 class CatBoostModelSaveOptions(BaseModelSaveOption):

snowflake/ml/modeling/_internal/snowpark_implementations/distributed_hpo_trainer.py CHANGED Viewed

@@ -41,7 +41,7 @@ cp.register_pickle_by_value(inspect.getmodule(snowpark_dataframe_utils.cast_snow
 _PROJECT = "ModelDevelopment"
 DEFAULT_UDTF_NJOBS = 3
-ENABLE_EFFICIENT_MEMORY_USAGE = False
+ENABLE_EFFICIENT_MEMORY_USAGE = True
 _UDTF_STAGE_NAME = f"MEMORY_EFFICIENT_UDTF_{str(uuid.uuid4()).replace('-', '_')}"

snowflake/ml/modeling/_internal/snowpark_implementations/distributed_search_udf_file.py CHANGED Viewed

@@ -83,7 +83,19 @@ def _load_data_into_udf() -> Tuple[
     with open(local_fit_and_score_kwargs_file_path, mode="rb") as local_fit_and_score_kwargs_file_obj:
         fit_and_score_kwargs = cp.load(local_fit_and_score_kwargs_file_obj)
-    # convert dataframe to numpy would save memory consumption
+    # Convert dataframe to numpy would save memory consumption
+    # Except for Pipeline, we need to keep the dataframe for the column names
+    from sklearn.pipeline import Pipeline
+    if isinstance(base_estimator, Pipeline):
+        return (
+            df[CONSTANTS['input_cols']],
+            df[CONSTANTS['label_cols']].squeeze(),
+            indices,
+            params_to_evaluate,
+            base_estimator,
+            fit_and_score_kwargs,
+            CONSTANTS
+        )
     return (
         df[CONSTANTS['input_cols']].to_numpy(),
         df[CONSTANTS['label_cols']].squeeze().to_numpy(),

snowflake-ml-python 1.5.4__py3-none-any.whl → 1.6.1__py3-none-any.whl

snowflake-ml-python 1.5.4py3-none-any.whl → 1.6.1py3-none-any.whl