PyPI - snowflake-ml-python - Versions diffs - 1.5.3__py3-none-any.whl → 1.6.0__py3-none-any.whl - Mend

snowflake-ml-python 1.5.3py3-none-any.whl → 1.6.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (166) hide show

snowflake/ml/model/_model_composer/model_manifest/model_manifest.py CHANGED Viewed

@@ -1,19 +1,22 @@
 import collections
 import copy
 import pathlib
+import warnings
 from typing import List, Optional, cast
 import yaml
-from snowflake.ml._internal.lineage import data_source
+from snowflake.ml.data import data_source
 from snowflake.ml.model import type_hints
 from snowflake.ml.model._model_composer.model_manifest import model_manifest_schema
 from snowflake.ml.model._model_composer.model_method import (
     function_generator,
     model_method,
 )
-from snowflake.ml.model._packager.model_meta import model_meta as model_meta_api
-from snowflake.snowpark import Session
+from snowflake.ml.model._packager.model_meta import (
+    model_meta as model_meta_api,
+    model_meta_schema,
+)
 class ModelManifest:
@@ -33,9 +36,8 @@ class ModelManifest:
     def save(
         self,
-        session: Session,
         model_meta: model_meta_api.ModelMetadata,
-        model_file_rel_path: pathlib.PurePosixPath,
+        model_rel_path: pathlib.PurePosixPath,
         options: Optional[type_hints.ModelSaveOption] = None,
         data_sources: Optional[List[data_source.DataSource]] = None,
     ) -> None:
@@ -44,10 +46,10 @@ class ModelManifest:
         runtime_to_use = copy.deepcopy(model_meta.runtimes["cpu"])
         runtime_to_use.name = self._DEFAULT_RUNTIME_NAME
-        runtime_to_use.imports.append(model_file_rel_path)
+        runtime_to_use.imports.append(str(model_rel_path) + "/")
         runtime_dict = runtime_to_use.save(self.workspace_path)
-        self.function_generator = function_generator.FunctionGenerator(model_file_rel_path=model_file_rel_path)
+        self.function_generator = function_generator.FunctionGenerator(model_dir_rel_path=model_rel_path)
         self.methods: List[model_method.ModelMethod] = []
         for target_method in model_meta.signatures.keys():
             method = model_method.ModelMethod(
@@ -55,6 +57,9 @@ class ModelManifest:
                 target_method=target_method,
                 runtime_name=self._DEFAULT_RUNTIME_NAME,
                 function_generator=self.function_generator,
+                is_partitioned_function=model_meta.function_properties.get(target_method, {}).get(
+                    model_meta_schema.FunctionProperties.PARTITIONED.value, False
+                ),
                 options=model_method.get_model_method_options_from_options(options, target_method),
             )
@@ -69,6 +74,16 @@ class ModelManifest:
                 "In this case, set case_sensitive as True for those methods to distinguish them."
             )
+        dependencies = model_manifest_schema.ModelRuntimeDependenciesDict(conda=runtime_dict["dependencies"]["conda"])
+        if options.get("include_pip_dependencies"):
+            warnings.warn(
+                "`include_pip_dependencies` specified as True: pip dependencies will be included and may not"
+                "be warehouse-compabible. The model may need to be run in SPCS.",
+                category=UserWarning,
+                stacklevel=1,
+            )
+            dependencies["pip"] = runtime_dict["dependencies"]["pip"]
         manifest_dict = model_manifest_schema.ModelManifestDict(
             manifest_version=model_manifest_schema.MODEL_MANIFEST_VERSION,
             runtimes={
@@ -76,9 +91,7 @@ class ModelManifest:
                     language="PYTHON",
                     version=runtime_to_use.runtime_env.python_version,
                     imports=runtime_dict["imports"],
-                    dependencies=model_manifest_schema.ModelRuntimeDependenciesDict(
-                        conda=runtime_dict["dependencies"]["conda"]
-                    ),
+                    dependencies=dependencies,
                 )
             },
             methods=[
@@ -121,12 +134,13 @@ class ModelManifest:
         result = []
         if data_sources:
             for source in data_sources:
-                result.append(
-                    model_manifest_schema.LineageSourceDict(
-                        # Currently, we only support lineage from Dataset.
-                        type=model_manifest_schema.LineageSourceTypes.DATASET.value,
-                        entity=source.fully_qualified_name,
-                        version=source.version,
+                if isinstance(source, data_source.DatasetInfo):
+                    result.append(
+                        model_manifest_schema.LineageSourceDict(
+                            # Currently, we only support lineage from Dataset.
+                            type=model_manifest_schema.LineageSourceTypes.DATASET.value,
+                            entity=source.fully_qualified_name,
+                            version=source.version,
+                        )
                     )
-                )
         return result

snowflake/ml/model/_model_composer/model_manifest/model_manifest_schema.py CHANGED Viewed

@@ -18,7 +18,8 @@ class ModelMethodFunctionTypes(enum.Enum):
 class ModelRuntimeDependenciesDict(TypedDict):
-    conda: Required[str]
+    conda: NotRequired[str]
+    pip: NotRequired[str]
 class ModelRuntimeDict(TypedDict):

snowflake/ml/model/_model_composer/model_method/function_generator.py CHANGED Viewed

@@ -3,7 +3,14 @@ from typing import Optional, TypedDict
 from typing_extensions import NotRequired
+from snowflake.ml._internal.exceptions import (
+    error_codes,
+    exceptions as snowml_exceptions,
+)
 from snowflake.ml.model import type_hints
+from snowflake.ml.model._model_composer.model_manifest.model_manifest_schema import (
+    ModelMethodFunctionTypes,
+)
 class FunctionGenerateOptions(TypedDict):
@@ -26,15 +33,16 @@ class FunctionGenerator:
     def __init__(
         self,
-        model_file_rel_path: pathlib.PurePosixPath,
+        model_dir_rel_path: pathlib.PurePosixPath,
     ) -> None:
-        self.model_file_rel_path = model_file_rel_path
+        self.model_dir_rel_path = model_dir_rel_path
     def generate(
         self,
         function_file_path: pathlib.Path,
         target_method: str,
         function_type: str,
+        is_partitioned_function: bool = False,
         options: Optional[FunctionGenerateOptions] = None,
     ) -> None:
         import importlib_resources
@@ -42,7 +50,15 @@ class FunctionGenerator:
         if options is None:
             options = {}
-        template_filename = f"infer_{function_type.lower()}.py_template"
+        if is_partitioned_function:
+            if function_type != ModelMethodFunctionTypes.TABLE_FUNCTION.value:
+                raise snowml_exceptions.SnowflakeMLException(
+                    error_code=error_codes.INVALID_DATA,
+                    original_exception=ValueError("Partitioned inference api functions must have type TABLE_FUNCTION."),
+                )
+            template_filename = "infer_partitioned.py_template"
+        else:
+            template_filename = f"infer_{function_type.lower()}.py_template"
         function_template = (
             importlib_resources.files("snowflake.ml.model._model_composer.model_method")
@@ -51,7 +67,7 @@ class FunctionGenerator:
         )
         udf_code = function_template.format(
-            model_file_name=self.model_file_rel_path.name,
+            model_dir_name=self.model_dir_rel_path.name,
             target_method=target_method,
             max_batch_size=options.get("max_batch_size", None),
             function_name=FunctionGenerator.FUNCTION_NAME,

snowflake/ml/model/_model_composer/model_method/infer_function.py_template CHANGED Viewed

@@ -1,12 +1,7 @@
-import fcntl
 import functools
 import inspect
 import os
 import sys
-import threading
-import zipfile
-from types import TracebackType
-from typing import Optional, Type
 import anyio
 import pandas as pd
@@ -15,42 +10,18 @@ from _snowflake import vectorized
 from snowflake.ml.model._packager import model_packager
-class FileLock:
-    def __enter__(self) -> None:
-        self._lock = threading.Lock()
-        self._lock.acquire()
-        self._fd = open("/tmp/lockfile.LOCK", "w+")
-        fcntl.lockf(self._fd, fcntl.LOCK_EX)
-    def __exit__(
-        self, exc_type: Optional[Type[BaseException]], exc: Optional[BaseException], traceback: Optional[TracebackType]
-    ) -> None:
-        self._fd.close()
-        self._lock.release()
 # User-defined parameters
-MODEL_FILE_NAME = "{model_file_name}"
+MODEL_DIR_REL_PATH = "{model_dir_name}"
 TARGET_METHOD = "{target_method}"
 MAX_BATCH_SIZE = {max_batch_size}
 # Retrieve the model
 IMPORT_DIRECTORY_NAME = "snowflake_import_directory"
 import_dir = sys._xoptions[IMPORT_DIRECTORY_NAME]
-model_dir_name = os.path.splitext(MODEL_FILE_NAME)[0]
-zip_model_path = os.path.join(import_dir, MODEL_FILE_NAME)
-extracted = "/tmp/models"
-extracted_model_dir_path = os.path.join(extracted, model_dir_name)
-with FileLock():
-    if not os.path.isdir(extracted_model_dir_path):
-        with zipfile.ZipFile(zip_model_path, "r") as myzip:
-            myzip.extractall(extracted_model_dir_path)
+model_dir_path = os.path.join(import_dir, MODEL_DIR_REL_PATH)
 # Load the model
-pk = model_packager.ModelPackager(extracted_model_dir_path)
+pk = model_packager.ModelPackager(model_dir_path)
 pk.load(as_custom_model=True)
 assert pk.model, "model is not loaded"
 assert pk.meta, "model metadata is not loaded"

snowflake/ml/model/_model_composer/model_method/infer_partitioned.py_template ADDED Viewed

@@ -0,0 +1,55 @@
+import fcntl
+import functools
+import inspect
+import os
+import sys
+import threading
+import zipfile
+from types import TracebackType
+from typing import Optional, Type
+import anyio
+import pandas as pd
+from _snowflake import vectorized
+from snowflake.ml.model._packager import model_packager
+# User-defined parameters
+MODEL_DIR_REL_PATH = "{model_dir_name}"
+TARGET_METHOD = "{target_method}"
+MAX_BATCH_SIZE = {max_batch_size}
+# Retrieve the model
+IMPORT_DIRECTORY_NAME = "snowflake_import_directory"
+import_dir = sys._xoptions[IMPORT_DIRECTORY_NAME]
+model_dir_path = os.path.join(import_dir, MODEL_DIR_REL_PATH)
+# Load the model
+pk = model_packager.ModelPackager(model_dir_path)
+pk.load(as_custom_model=True)
+assert pk.model, "model is not loaded"
+assert pk.meta, "model metadata is not loaded"
+# Determine the actual runner
+model = pk.model
+meta = pk.meta
+func = getattr(model, TARGET_METHOD)
+if inspect.iscoroutinefunction(func):
+    runner = functools.partial(anyio.run, func)
+else:
+    runner = functools.partial(func)
+# Determine preprocess parameters
+features = meta.signatures[TARGET_METHOD].inputs
+input_cols = [feature.name for feature in features]
+dtype_map = {{feature.name: feature.as_dtype() for feature in features}}
+# Actual table function
+class {function_name}:
+    @vectorized(input=pd.DataFrame)
+    def end_partition(self, df: pd.DataFrame) -> pd.DataFrame:
+        df.columns = input_cols
+        input_df = df.astype(dtype=dtype_map)
+        return runner(input_df[input_cols])

snowflake/ml/model/_model_composer/model_method/infer_table_function.py_template CHANGED Viewed

@@ -1,12 +1,7 @@
-import fcntl
 import functools
 import inspect
 import os
 import sys
-import threading
-import zipfile
-from types import TracebackType
-from typing import Optional, Type
 import anyio
 import pandas as pd
@@ -15,42 +10,18 @@ from _snowflake import vectorized
 from snowflake.ml.model._packager import model_packager
-class FileLock:
-    def __enter__(self) -> None:
-        self._lock = threading.Lock()
-        self._lock.acquire()
-        self._fd = open("/tmp/lockfile.LOCK", "w+")
-        fcntl.lockf(self._fd, fcntl.LOCK_EX)
-    def __exit__(
-        self, exc_type: Optional[Type[BaseException]], exc: Optional[BaseException], traceback: Optional[TracebackType]
-    ) -> None:
-        self._fd.close()
-        self._lock.release()
 # User-defined parameters
-MODEL_FILE_NAME = "{model_file_name}"
+MODEL_DIR_REL_PATH = "{model_dir_name}"
 TARGET_METHOD = "{target_method}"
 MAX_BATCH_SIZE = {max_batch_size}
 # Retrieve the model
 IMPORT_DIRECTORY_NAME = "snowflake_import_directory"
 import_dir = sys._xoptions[IMPORT_DIRECTORY_NAME]
-model_dir_name = os.path.splitext(MODEL_FILE_NAME)[0]
-zip_model_path = os.path.join(import_dir, MODEL_FILE_NAME)
-extracted = "/tmp/models"
-extracted_model_dir_path = os.path.join(extracted, model_dir_name)
-with FileLock():
-    if not os.path.isdir(extracted_model_dir_path):
-        with zipfile.ZipFile(zip_model_path, "r") as myzip:
-            myzip.extractall(extracted_model_dir_path)
+model_dir_path = os.path.join(import_dir, MODEL_DIR_REL_PATH)
 # Load the model
-pk = model_packager.ModelPackager(extracted_model_dir_path)
+pk = model_packager.ModelPackager(model_dir_path)
 pk.load(as_custom_model=True)
 assert pk.model, "model is not loaded"
 assert pk.meta, "model metadata is not loaded"
@@ -72,8 +43,8 @@ dtype_map = {{feature.name: feature.as_dtype() for feature in features}}
 # Actual table function
 class {function_name}:
-    @vectorized(input=pd.DataFrame)
-    def end_partition(self, df: pd.DataFrame) -> pd.DataFrame:
+    @vectorized(input=pd.DataFrame, max_batch_size=MAX_BATCH_SIZE)
+    def process(self, df: pd.DataFrame) -> pd.DataFrame:
         df.columns = input_cols
         input_df = df.astype(dtype=dtype_map)
         return runner(input_df[input_cols])

snowflake/ml/model/_model_composer/model_method/model_method.py CHANGED Viewed

@@ -26,13 +26,14 @@ class ModelMethodOptions(TypedDict):
 def get_model_method_options_from_options(
     options: type_hints.ModelSaveOption, target_method: str
 ) -> ModelMethodOptions:
+    default_function_type = model_manifest_schema.ModelMethodFunctionTypes.FUNCTION.value
+    if options.get("enable_explainability", False) and target_method.startswith("explain"):
+        default_function_type = model_manifest_schema.ModelMethodFunctionTypes.TABLE_FUNCTION.value
     method_option = options.get("method_options", {}).get(target_method, {})
-    global_function_type = options.get("function_type", model_manifest_schema.ModelMethodFunctionTypes.FUNCTION.value)
+    global_function_type = options.get("function_type", default_function_type)
     function_type = method_option.get("function_type", global_function_type)
     if function_type not in [function_type.value for function_type in model_manifest_schema.ModelMethodFunctionTypes]:
-        raise NotImplementedError
-    # TODO(TH): enforce minimum snowflake version
+        raise NotImplementedError(f"Function type {function_type} is not supported.")
     return ModelMethodOptions(
         case_sensitive=method_option.get("case_sensitive", False),
@@ -47,10 +48,9 @@ class ModelMethod:
     Attributes:
         model_meta: Model Metadata.
         target_method: Original target method name to call with the model.
-        method_name: The actual method name registered in manifest and used in SQL.
-        function_generator: Function file generator.
         runtime_name: Name of the Model Runtime to run the method.
+        function_generator: Function file generator.
+        is_partitioned_function:  Whether the model method function is partitioned.
         options: Model Method Options.
     """
@@ -63,11 +63,13 @@ class ModelMethod:
         target_method: str,
         runtime_name: str,
         function_generator: function_generator.FunctionGenerator,
+        is_partitioned_function: bool = False,
         options: Optional[ModelMethodOptions] = None,
     ) -> None:
         self.model_meta = model_meta
         self.target_method = target_method
         self.function_generator = function_generator
+        self.is_partitioned_function = is_partitioned_function
         self.runtime_name = runtime_name
         self.options = options or {}
         try:
@@ -111,6 +113,7 @@ class ModelMethod:
             workspace_path / ModelMethod.FUNCTIONS_DIR_REL_PATH / f"{self.target_method}.py",
             self.target_method,
             self.function_type,
+            self.is_partitioned_function,
             options=options,
         )
         input_list = [

snowflake/ml/model/_packager/model_handlers/_base.py CHANGED Viewed

@@ -1,4 +1,5 @@
 from abc import abstractmethod
+from enum import Enum
 from typing import Dict, Generic, Optional, Protocol, Type, final
 from typing_extensions import TypeGuard, Unpack
@@ -8,6 +9,15 @@ from snowflake.ml.model._packager.model_handlers_migrator import base_migrator
 from snowflake.ml.model._packager.model_meta import model_meta
+class ModelObjective(Enum):
+    # This is not getting stored anywhere as metadata yet so it should be fine to slowly extend it for better coverage
+    UNKNOWN = "unknown"
+    BINARY_CLASSIFICATION = "binary_classification"
+    MULTI_CLASSIFICATION = "multi_classification"
+    REGRESSION = "regression"
+    RANKING = "ranking"
 class _BaseModelHandlerProtocol(Protocol[model_types._ModelType]):
     HANDLER_TYPE: model_types.SupportedModelHandlerType
     HANDLER_VERSION: str
@@ -16,7 +26,7 @@ class _BaseModelHandlerProtocol(Protocol[model_types._ModelType]):
     @classmethod
     @abstractmethod
-    def can_handle(cls, model: model_types.SupportedDataType) -> TypeGuard[model_types._ModelType]:
+    def can_handle(cls, model: model_types.SupportedModelType) -> TypeGuard[model_types._ModelType]:
         """Whether this handler could support the type of the `model`.
         Args:
@@ -75,7 +85,7 @@ class _BaseModelHandlerProtocol(Protocol[model_types._ModelType]):
         name: str,
         model_meta: model_meta.ModelMetadata,
         model_blobs_dir_path: str,
-        **kwargs: Unpack[model_types.ModelLoadOption],
+        **kwargs: Unpack[model_types.BaseModelLoadOption],
     ) -> model_types._ModelType:
         """Load the model into memory.
@@ -96,7 +106,7 @@ class _BaseModelHandlerProtocol(Protocol[model_types._ModelType]):
         cls,
         raw_model: model_types._ModelType,
         model_meta: model_meta.ModelMetadata,
-        **kwargs: Unpack[model_types.ModelLoadOption],
+        **kwargs: Unpack[model_types.BaseModelLoadOption],
     ) -> custom_model.CustomModel:
         """Create a custom model class wrap for unified interface when being deployed. The predict method will be
         re-targeted based on target_method metadata.

snowflake/ml/model/_packager/model_handlers/_utils.py CHANGED Viewed

@@ -1,4 +1,9 @@
-from typing import Callable, Iterable, Optional, Sequence, cast
+import json
+from typing import Any, Callable, Iterable, Optional, Sequence, cast
+import numpy as np
+import numpy.typing as npt
+import pandas as pd
 from snowflake.ml.model import model_signature, type_hints as model_types
 from snowflake.ml.model._packager.model_meta import model_meta
@@ -36,6 +41,25 @@ def validate_signature(
         predictions_df = get_prediction_fn(target_method, local_sample_input)
         sig = model_signature.infer_signature(local_sample_input, predictions_df)
         model_meta.signatures[target_method] = sig
+    return model_meta
+def add_explain_method_signature(
+    model_meta: model_meta.ModelMetadata,
+    explain_method: str,
+    target_method: str,
+    output_return_type: model_signature.DataType = model_signature.DataType.DOUBLE,
+) -> model_meta.ModelMetadata:
+    if target_method not in model_meta.signatures:
+        raise ValueError(f"Signature for target method {target_method} is missing")
+    inputs = model_meta.signatures[target_method].inputs
+    model_meta.signatures[explain_method] = model_signature.ModelSignature(
+        inputs=inputs,
+        outputs=[
+            model_signature.FeatureSpec(dtype=output_return_type, name=f"{spec.name}_explanation") for spec in inputs
+        ],
+    )
     return model_meta
@@ -55,3 +79,37 @@ def validate_target_methods(model: model_types.SupportedModelType, target_method
     for method_name in target_methods:
         if not _is_callable(model, method_name):
             raise ValueError(f"Target method {method_name} is not callable or does not exist in the model.")
+def get_num_classes_if_exists(model: model_types.SupportedModelType) -> int:
+    num_classes = getattr(model, "classes_", [])
+    return len(num_classes)
+def convert_explanations_to_2D_df(
+    model: model_types.SupportedModelType, explanations: npt.NDArray[Any]
+) -> pd.DataFrame:
+    if explanations.ndim != 3:
+        return pd.DataFrame(explanations)
+    if hasattr(model, "classes_"):
+        classes_list = [cl for cl in model.classes_]  # type:ignore[union-attr]
+        len_classes = len(classes_list)
+        if explanations.shape[2] != len_classes:
+            raise ValueError(f"Model has {len_classes} classes but explanations have {explanations.shape[2]}")
+    else:
+        classes_list = [i for i in range(explanations.shape[2])]
+    exp_2d = []
+    # TODO (SNOW-1549044): Optimize this
+    for row in explanations:
+        col_list = []
+        for column in row:
+            class_explanations = {}
+            for cl, cl_exp in zip(classes_list, column):
+                if isinstance(cl, (int, np.integer)):
+                    cl = int(cl)
+                class_explanations[cl] = cl_exp
+            col_list.append(json.dumps(class_explanations))
+        exp_2d.append(col_list)
+    return pd.DataFrame(exp_2d)

snowflake/ml/model/_packager/model_handlers/catboost.py CHANGED Viewed

@@ -33,6 +33,22 @@ class CatBoostModelHandler(_base.BaseModelHandler["catboost.CatBoost"]):
     MODELE_BLOB_FILE_OR_DIR = "model.bin"
     DEFAULT_TARGET_METHODS = ["predict", "predict_proba"]
+    @classmethod
+    def get_model_objective(cls, model: "catboost.CatBoost") -> _base.ModelObjective:
+        import catboost
+        if isinstance(model, catboost.CatBoostClassifier):
+            num_classes = handlers_utils.get_num_classes_if_exists(model)
+            if num_classes == 2:
+                return _base.ModelObjective.BINARY_CLASSIFICATION
+            return _base.ModelObjective.MULTI_CLASSIFICATION
+        if isinstance(model, catboost.CatBoostRanker):
+            return _base.ModelObjective.RANKING
+        if isinstance(model, catboost.CatBoostRegressor):
+            return _base.ModelObjective.REGRESSION
+        # TODO: Find out model type from the generic Catboost Model
+        return _base.ModelObjective.UNKNOWN
     @classmethod
     def can_handle(cls, model: model_types.SupportedModelType) -> TypeGuard["catboost.CatBoost"]:
         return (type_utils.LazyType("catboost.CatBoost").isinstance(model)) and any(
@@ -89,6 +105,16 @@ class CatBoostModelHandler(_base.BaseModelHandler["catboost.CatBoost"]):
                 sample_input_data=sample_input_data,
                 get_prediction_fn=get_prediction,
             )
+            if kwargs.get("enable_explainability", False):
+                output_type = model_signature.DataType.DOUBLE
+                if cls.get_model_objective(model) == _base.ModelObjective.MULTI_CLASSIFICATION:
+                    output_type = model_signature.DataType.STRING
+                model_meta = handlers_utils.add_explain_method_signature(
+                    model_meta=model_meta,
+                    explain_method="explain",
+                    target_method="predict",
+                    output_return_type=output_type,
+                )
         model_blob_path = os.path.join(model_blobs_dir_path, name)
         os.makedirs(model_blob_path, exist_ok=True)
@@ -112,6 +138,11 @@ class CatBoostModelHandler(_base.BaseModelHandler["catboost.CatBoost"]):
             ],
             check_local_version=True,
         )
+        if kwargs.get("enable_explainability", False):
+            model_meta.env.include_if_absent(
+                [model_env.ModelDependency(requirement="shap", pip_name="shap")],
+                check_local_version=True,
+            )
         model_meta.env.cuda_version = kwargs.get("cuda_version", model_env.DEFAULT_CUDA_VERSION)
         return None
@@ -122,7 +153,7 @@ class CatBoostModelHandler(_base.BaseModelHandler["catboost.CatBoost"]):
         name: str,
         model_meta: model_meta_api.ModelMetadata,
         model_blobs_dir_path: str,
-        **kwargs: Unpack[model_types.ModelLoadOption],
+        **kwargs: Unpack[model_types.CatBoostModelLoadOptions],
     ) -> "catboost.CatBoost":
         import catboost
@@ -157,7 +188,7 @@ class CatBoostModelHandler(_base.BaseModelHandler["catboost.CatBoost"]):
         cls,
         raw_model: "catboost.CatBoost",
         model_meta: model_meta_api.ModelMetadata,
-        **kwargs: Unpack[model_types.ModelLoadOption],
+        **kwargs: Unpack[model_types.CatBoostModelLoadOptions],
     ) -> custom_model.CustomModel:
         import catboost
@@ -186,6 +217,17 @@ class CatBoostModelHandler(_base.BaseModelHandler["catboost.CatBoost"]):
                     return model_signature_utils.rename_pandas_df(df, signature.outputs)
+                @custom_model.inference_api
+                def explain_fn(self: custom_model.CustomModel, X: pd.DataFrame) -> pd.DataFrame:
+                    import shap
+                    explainer = shap.TreeExplainer(raw_model)
+                    df = handlers_utils.convert_explanations_to_2D_df(raw_model, explainer(X).values)
+                    return model_signature_utils.rename_pandas_df(df, signature.outputs)
+                if target_method == "explain":
+                    return explain_fn
                 return fn
             type_method_dict: Dict[str, Any] = {"_raw_model": raw_model}

snowflake/ml/model/_packager/model_handlers/custom.py CHANGED Viewed

@@ -17,6 +17,7 @@ from snowflake.ml.model._packager.model_handlers_migrator import base_migrator
 from snowflake.ml.model._packager.model_meta import (
     model_blob_meta,
     model_meta as model_meta_api,
+    model_meta_schema,
 )
@@ -68,6 +69,11 @@ class CustomModelHandler(_base.BaseModelHandler["custom_model.CustomModel"]):
                 predictions_df = target_method(model, sample_input_data)
             return predictions_df
+        for func_name in model._get_partitioned_infer_methods():
+            function_properties = model_meta.function_properties.get(func_name, {})
+            function_properties[model_meta_schema.FunctionProperties.PARTITIONED.value] = True
+            model_meta.function_properties[func_name] = function_properties
         if not is_sub_model:
             model_meta = handlers_utils.validate_signature(
                 model=model,
@@ -101,14 +107,16 @@ class CustomModelHandler(_base.BaseModelHandler["custom_model.CustomModel"]):
         # Make sure that the module where the model is defined get pickled by value as well.
         cloudpickle.register_pickle_by_value(sys.modules[model.__module__])
-        picked_obj = (model.__class__, model.context)
+        pickled_obj = (model.__class__, model.context)
         with open(os.path.join(model_blob_path, cls.MODELE_BLOB_FILE_OR_DIR), "wb") as f:
-            cloudpickle.dump(picked_obj, f)
+            cloudpickle.dump(pickled_obj, f)
+        # model meta will be saved by the context manager
         model_meta.models[name] = model_blob_meta.ModelBlobMeta(
             name=name,
             model_type=cls.HANDLER_TYPE,
             path=cls.MODELE_BLOB_FILE_OR_DIR,
             handler_version=cls.HANDLER_VERSION,
+            function_properties=model_meta.function_properties,
             artifacts={
                 name: pathlib.Path(
                     os.path.join(cls.MODEL_ARTIFACTS_DIR, os.path.basename(os.path.normpath(path=uri)))
@@ -128,7 +136,7 @@ class CustomModelHandler(_base.BaseModelHandler["custom_model.CustomModel"]):
         name: str,
         model_meta: model_meta_api.ModelMetadata,
         model_blobs_dir_path: str,
-        **kwargs: Unpack[model_types.ModelLoadOption],
+        **kwargs: Unpack[model_types.CustomModelLoadOption],
     ) -> "custom_model.CustomModel":
         model_blob_path = os.path.join(model_blobs_dir_path, name)
@@ -175,6 +183,6 @@ class CustomModelHandler(_base.BaseModelHandler["custom_model.CustomModel"]):
         cls,
         raw_model: custom_model.CustomModel,
         model_meta: model_meta_api.ModelMetadata,
-        **kwargs: Unpack[model_types.ModelLoadOption],
+        **kwargs: Unpack[model_types.CustomModelLoadOption],
     ) -> custom_model.CustomModel:
         return raw_model

snowflake-ml-python 1.5.3__py3-none-any.whl → 1.6.0__py3-none-any.whl

snowflake-ml-python 1.5.3py3-none-any.whl → 1.6.0py3-none-any.whl