PyPI - snowflake-ml-python - Versions diffs - 1.8.3__py3-none-any.whl → 1.8.4__py3-none-any.whl - Mend

snowflake-ml-python 1.8.3py3-none-any.whl → 1.8.4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (190) hide show

snowflake/ml/model/_model_composer/model_composer.py CHANGED Viewed

@@ -142,28 +142,51 @@ class ModelComposer:
         conda_dep_dict = env_utils.validate_conda_dependency_string_list(
             conda_dependencies if conda_dependencies else []
         )
-        is_warehouse_runnable = (
-            not conda_dep_dict
-            or all(
-                chan == env_utils.DEFAULT_CHANNEL_NAME or chan == env_utils.SNOWFLAKE_CONDA_CHANNEL_URL
-                for chan in conda_dep_dict
-            )
-        ) and (not pip_requirements)
-        disable_explainability = (
-            target_platforms and model_types.TargetPlatform.SNOWPARK_CONTAINER_SERVICES in target_platforms
-        ) or (not is_warehouse_runnable)
-        if disable_explainability and options and options.get("enable_explainability", False):
-            warnings.warn(
-                ("The model can be deployed to Snowpark Container Services only if `enable_explainability=False`."),
-                category=UserWarning,
-                stacklevel=2,
+        enable_explainability = None
+        if options:
+            enable_explainability = options.get("enable_explainability", None)
+        # skip everything if user said False explicitly
+        if enable_explainability is None or enable_explainability is True:
+            is_warehouse_runnable = (
+                not conda_dep_dict
+                or all(
+                    chan == env_utils.DEFAULT_CHANNEL_NAME or chan == env_utils.SNOWFLAKE_CONDA_CHANNEL_URL
+                    for chan in conda_dep_dict
+                )
+            ) and (not pip_requirements)
+            only_spcs = (
+                target_platforms
+                and len(target_platforms) == 1
+                and model_types.TargetPlatform.SNOWPARK_CONTAINER_SERVICES in target_platforms
             )
+            if only_spcs or (not is_warehouse_runnable):
+                # if only SPCS and user asked for explainability we fail
+                if enable_explainability is True:
+                    raise ValueError(
+                        "`enable_explainability` cannot be set to True when the model is not runnable in WH "
+                        "or the target platforms include SPCS."
+                    )
+                elif not options:  # explicitly set flag to false in these cases if not specified
+                    options = model_types.BaseModelSaveOption()
+                    options["enable_explainability"] = False
+            elif (
+                target_platforms
+                and len(target_platforms) > 1
+                and model_types.TargetPlatform.SNOWPARK_CONTAINER_SERVICES in target_platforms
+            ):  # if both then only available for WH
+                if enable_explainability is True:
+                    warnings.warn(
+                        ("Explain function will only be available for model deployed to warehouse."),
+                        category=UserWarning,
+                        stacklevel=2,
+                    )
         if not options:
             options = model_types.BaseModelSaveOption()
-            if disable_explainability:
-                options["enable_explainability"] = False
         if not snowpark_utils.is_in_stored_procedure():  # type: ignore[no-untyped-call]
             snowml_matched_versions = env_utils.get_matched_package_versions_in_information_schema(

snowflake/ml/model/_packager/model_handlers/_utils.py CHANGED Viewed

@@ -109,6 +109,35 @@ def get_input_signature(
     return input_sig
+def add_inferred_explain_method_signature(
+    model_meta: model_meta.ModelMetadata,
+    explain_method: str,
+    target_method: str,
+    background_data: model_types.SupportedDataType,
+    explain_fn: Callable[[model_types.SupportedLocalDataType], model_types.SupportedLocalDataType],
+    output_feature_names: Optional[Sequence[str]] = None,
+) -> model_meta.ModelMetadata:
+    inputs = get_input_signature(model_meta, target_method)
+    if output_feature_names is None:  # If not provided, assume output feature names are the same as input feature names
+        output_feature_names = [spec.name for spec in inputs]
+    if model_meta.model_type == "snowml":
+        suffixed_output_names = [identifier.concat_names([name, "_explanation"]) for name in output_feature_names]
+    else:
+        suffixed_output_names = [f"{name}_explanation" for name in output_feature_names]
+    truncated_background_data = get_truncated_sample_data(background_data, 5)
+    sig = model_signature.infer_signature(
+        input_data=truncated_background_data,
+        output_data=explain_fn(truncated_background_data),
+        input_feature_names=[spec.name for spec in inputs],
+        output_feature_names=suffixed_output_names,
+    )
+    model_meta.signatures[explain_method] = sig
+    return model_meta
 def add_explain_method_signature(
     model_meta: model_meta.ModelMetadata,
     explain_method: str,
@@ -236,8 +265,9 @@ def validate_model_task(passed_model_task: model_types.Task, inferred_model_task
 def get_explain_target_method(
     model_metadata: model_meta.ModelMetadata, target_methods_list: list[str]
 ) -> Optional[str]:
-    for method in model_metadata.signatures.keys():
-        if method in target_methods_list:
+    """Returns the first target method that is found in the model metadata signatures."""
+    for method in target_methods_list:
+        if method in model_metadata.signatures.keys():
             return method
     return None

snowflake/ml/model/_packager/model_handlers/custom.py CHANGED Viewed

@@ -72,7 +72,7 @@ class CustomModelHandler(_base.BaseModelHandler["custom_model.CustomModel"]):
                 predictions_df = target_method(model, sample_input_data)
             return predictions_df
-        for func_name in model._get_partitioned_infer_methods():
+        for func_name in model._get_partitioned_methods():
             function_properties = model_meta.function_properties.get(func_name, {})
             function_properties[model_meta_schema.FunctionProperties.PARTITIONED.value] = True
             model_meta.function_properties[func_name] = function_properties

snowflake/ml/model/_packager/model_handlers/pytorch.py CHANGED Viewed

@@ -82,6 +82,7 @@ class PyTorchHandler(_base.BaseModelHandler["torch.nn.Module"]):
         enable_explainability = kwargs.get("enable_explainability", False)
         if enable_explainability:
             raise NotImplementedError("Explainability is not supported for PyTorch model.")
+        multiple_inputs = kwargs.get("multiple_inputs", False)
         import torch
@@ -94,8 +95,6 @@ class PyTorchHandler(_base.BaseModelHandler["torch.nn.Module"]):
                 default_target_methods=cls.DEFAULT_TARGET_METHODS,
             )
-            multiple_inputs = kwargs.get("multiple_inputs", False)
             def get_prediction(
                 target_method_name: str, sample_input_data: "model_types.SupportedLocalDataType"
             ) -> model_types.SupportedLocalDataType:

snowflake/ml/model/_packager/model_handlers/sklearn.py CHANGED Viewed

@@ -1,6 +1,6 @@
 import os
 import warnings
-from typing import TYPE_CHECKING, Callable, Optional, Union, cast, final
+from typing import TYPE_CHECKING, Callable, Optional, Sequence, Union, cast, final
 import cloudpickle
 import numpy as np
@@ -38,6 +38,35 @@ def _unpack_container_runtime_pipeline(model: "sklearn.pipeline.Pipeline") -> "s
     return model
+def _apply_transforms_up_to_last_step(
+    model: Union["sklearn.base.BaseEstimator", "sklearn.pipeline.Pipeline"],
+    data: model_types.SupportedDataType,
+    input_feature_names: Optional[list[str]] = None,
+) -> pd.DataFrame:
+    """Apply all transformations in the sklearn pipeline model up to the last step."""
+    transformed_data = data
+    output_features_names = input_feature_names
+    if type_utils.LazyType("sklearn.pipeline.Pipeline").isinstance(model):
+        for step_name, step in model.steps[:-1]:  # type: ignore[attr-defined]
+            if not hasattr(step, "transform"):
+                raise ValueError(f"Step '{step_name}' does not have a 'transform' method.")
+            transformed_data = step.transform(transformed_data)
+            if output_features_names is None:
+                continue
+            elif hasattr(step, "get_feature_names_out"):
+                output_features_names = step.get_feature_names_out(output_features_names)
+            else:
+                raise ValueError(
+                    f"Step '{step_name}' in the pipeline does not have a 'get_feature_names_out' method. "
+                    "Feature names cannot be propagated."
+                )
+    if type_utils.LazyType("scipy.sparse.csr_matrix").isinstance(transformed_data):
+        # Convert to dense array if it's a sparse matrix
+        transformed_data = transformed_data.toarray()  # type: ignore[attr-defined]
+    return pd.DataFrame(transformed_data, columns=output_features_names)
 @final
 class SKLModelHandler(_base.BaseModelHandler[Union["sklearn.base.BaseEstimator", "sklearn.pipeline.Pipeline"]]):
     """Handler for scikit-learn based model.
@@ -58,7 +87,9 @@ class SKLModelHandler(_base.BaseModelHandler[Union["sklearn.base.BaseEstimator",
         "decision_function",
         "score_samples",
     ]
-    EXPLAIN_TARGET_METHODS = ["predict", "predict_proba", "predict_log_proba"]
+    # Prioritize predict_proba as it gives multi-class probabilities
+    EXPLAIN_TARGET_METHODS = ["predict_proba", "predict", "predict_log_proba"]
     @classmethod
     def can_handle(
@@ -160,17 +191,38 @@ class SKLModelHandler(_base.BaseModelHandler[Union["sklearn.base.BaseEstimator",
                         stacklevel=1,
                     )
                     enable_explainability = False
-                elif model_meta.task == model_types.Task.UNKNOWN or explain_target_method is None:
+                elif model_meta.task == model_types.Task.UNKNOWN:
+                    enable_explainability = False
+                elif explain_target_method is None:
                     enable_explainability = False
                 else:
                     enable_explainability = True
             if enable_explainability:
-                model_meta = handlers_utils.add_explain_method_signature(
-                    model_meta=model_meta,
-                    explain_method="explain",
-                    target_method=explain_target_method,
-                    output_return_type=model_task_and_output_type.output_type,
+                explain_target_method = str(explain_target_method)  # mypy complains if we don't cast to str here
+                input_signature = handlers_utils.get_input_signature(model_meta, explain_target_method)
+                transformed_background_data = _apply_transforms_up_to_last_step(
+                    model=model,
+                    data=background_data,
+                    input_feature_names=[spec.name for spec in input_signature],
                 )
+                try:
+                    model_meta = handlers_utils.add_inferred_explain_method_signature(
+                        model_meta=model_meta,
+                        explain_method="explain",
+                        target_method=explain_target_method,
+                        background_data=background_data,
+                        explain_fn=cls._build_explain_fn(model, background_data, input_signature),
+                        output_feature_names=transformed_background_data.columns,
+                    )
+                except ValueError:
+                    if kwargs.get("enable_explainability", None):
+                        # user explicitly enabled explainability, so we should raise the error
+                        raise ValueError(
+                            "Explainability for this model is not supported. Please set `enable_explainability=False`"
+                        )
                 handlers_utils.save_background_data(
                     model_blobs_dir_path,
                     cls.EXPLAIN_ARTIFACTS_DIR,
@@ -222,11 +274,13 @@ class SKLModelHandler(_base.BaseModelHandler[Union["sklearn.base.BaseEstimator",
                     )
         if enable_explainability:
-            model_meta.env.include_if_absent([model_env.ModelDependency(requirement="shap", pip_name="shap")])
+            model_meta.env.include_if_absent([model_env.ModelDependency(requirement="shap>=0.46.0", pip_name="shap")])
             model_meta.explain_algorithm = model_meta_schema.ModelExplainAlgorithm.SHAP
         model_meta.env.include_if_absent(
-            [model_env.ModelDependency(requirement="scikit-learn", pip_name="scikit-learn")],
+            [
+                model_env.ModelDependency(requirement="scikit-learn", pip_name="scikit-learn"),
+            ],
             check_local_version=True,
         )
@@ -286,37 +340,8 @@ class SKLModelHandler(_base.BaseModelHandler[Union["sklearn.base.BaseEstimator",
                 @custom_model.inference_api
                 def explain_fn(self: custom_model.CustomModel, X: pd.DataFrame) -> pd.DataFrame:
-                    import shap
-                    try:
-                        explainer = shap.Explainer(raw_model, background_data)
-                        df = handlers_utils.convert_explanations_to_2D_df(raw_model, explainer(X).values)
-                    except TypeError:
-                        try:
-                            dtype_map = {spec.name: spec.as_dtype(force_numpy_dtype=True) for spec in signature.inputs}
-                            if isinstance(X, pd.DataFrame):
-                                X = X.astype(dtype_map, copy=False)
-                            if hasattr(raw_model, "predict_proba"):
-                                if isinstance(X, np.ndarray):
-                                    explanations = shap.Explainer(
-                                        raw_model.predict_proba, background_data.values  # type: ignore[union-attr]
-                                    )(X).values
-                                else:
-                                    explanations = shap.Explainer(raw_model.predict_proba, background_data)(X).values
-                            elif hasattr(raw_model, "predict"):
-                                if isinstance(X, np.ndarray):
-                                    explanations = shap.Explainer(
-                                        raw_model.predict, background_data.values  # type: ignore[union-attr]
-                                    )(X).values
-                                else:
-                                    explanations = shap.Explainer(raw_model.predict, background_data)(X).values
-                            else:
-                                raise ValueError("Missing any supported target method to explain.")
-                            df = handlers_utils.convert_explanations_to_2D_df(raw_model, explanations)
-                        except TypeError as e:
-                            raise ValueError(f"Explanation for this model type not supported yet: {str(e)}")
-                    return model_signature_utils.rename_pandas_df(df, signature.outputs)
+                    fn = cls._build_explain_fn(raw_model, background_data, signature.inputs)
+                    return model_signature_utils.rename_pandas_df(fn(X), signature.outputs)
                 if target_method == "explain":
                     return explain_fn
@@ -339,3 +364,37 @@ class SKLModelHandler(_base.BaseModelHandler[Union["sklearn.base.BaseEstimator",
         skl_model = _SKLModel(custom_model.ModelContext())
         return skl_model
+    @classmethod
+    def _build_explain_fn(
+        cls,
+        model: Union["sklearn.base.BaseEstimator", "sklearn.pipeline.Pipeline"],
+        background_data: model_types.SupportedDataType,
+        input_specs: Sequence[model_signature.BaseFeatureSpec],
+    ) -> Callable[[model_types.SupportedDataType], pd.DataFrame]:
+        import shap
+        import sklearn.pipeline
+        transformed_bg_data = _apply_transforms_up_to_last_step(model, background_data)
+        def explain_fn(data: model_types.SupportedDataType) -> pd.DataFrame:
+            transformed_data = _apply_transforms_up_to_last_step(model, data)
+            predictor = model[-1] if isinstance(model, sklearn.pipeline.Pipeline) else model
+            try:
+                explainer = shap.Explainer(predictor, transformed_bg_data)
+                return handlers_utils.convert_explanations_to_2D_df(model, explainer(transformed_data).values)
+            except TypeError:
+                if isinstance(data, pd.DataFrame):
+                    dtype_map = {spec.name: spec.as_dtype(force_numpy_dtype=True) for spec in input_specs}
+                    transformed_data = _apply_transforms_up_to_last_step(model, data.astype(dtype_map))
+                for explain_target_method in cls.EXPLAIN_TARGET_METHODS:
+                    if not hasattr(predictor, explain_target_method):
+                        continue
+                    explain_target_method_fn = getattr(predictor, explain_target_method)
+                    explanations = shap.Explainer(explain_target_method_fn, transformed_bg_data.values)(
+                        transformed_data.to_numpy()
+                    ).values
+                    return handlers_utils.convert_explanations_to_2D_df(model, explanations)
+                raise ValueError("Missing any supported target method to explain.")
+        return explain_fn

snowflake/ml/model/_packager/model_handlers/tensorflow.py CHANGED Viewed

@@ -88,6 +88,7 @@ class TensorFlowHandler(_base.BaseModelHandler["tensorflow.Module"]):
         import tensorflow
         assert isinstance(model, tensorflow.Module)
+        multiple_inputs = kwargs.get("multiple_inputs", False)
         is_keras_model = type_utils.LazyType("keras.Model").isinstance(model)
         is_tf_keras_model = type_utils.LazyType("tf_keras.Model").isinstance(model)
@@ -112,8 +113,6 @@ class TensorFlowHandler(_base.BaseModelHandler["tensorflow.Module"]):
                 default_target_methods=default_target_methods,
             )
-            multiple_inputs = kwargs.get("multiple_inputs", False)
             if is_keras_model and len(target_methods) > 1:
                 raise ValueError("Keras model can only have one target method.")
@@ -198,7 +197,6 @@ class TensorFlowHandler(_base.BaseModelHandler["tensorflow.Module"]):
         model_blobs_dir_path: str,
         **kwargs: Unpack[model_types.TensorflowLoadOptions],
     ) -> "tensorflow.Module":
-        os.environ["TF_USE_LEGACY_KERAS"] = "1"
         import tensorflow
         model_blob_path = os.path.join(model_blobs_dir_path, name)
@@ -209,7 +207,12 @@ class TensorFlowHandler(_base.BaseModelHandler["tensorflow.Module"]):
         load_path = os.path.join(model_blob_path, model_blob_filename)
         save_format = model_blob_options.get("save_format", "keras_tf")
         if save_format == "keras_tf":
-            m = tensorflow.keras.models.load_model(load_path)
+            if version.parse(tensorflow.keras.__version__) >= version.parse("3.0.0"):
+                import tf_keras
+                m = tf_keras.models.load_model(load_path)
+            else:
+                m = tensorflow.keras.models.load_model(load_path)
         else:
             m = tensorflow.saved_model.load(load_path)

snowflake/ml/model/_packager/model_handlers/torchscript.py CHANGED Viewed

@@ -76,6 +76,8 @@ class TorchScriptHandler(_base.BaseModelHandler["torch.jit.ScriptModule"]):
         if enable_explainability:
             raise NotImplementedError("Explainability is not supported for Torch Script model.")
+        multiple_inputs = kwargs.get("multiple_inputs", False)
         import torch
         assert isinstance(model, torch.jit.ScriptModule)
@@ -87,8 +89,6 @@ class TorchScriptHandler(_base.BaseModelHandler["torch.jit.ScriptModule"]):
                 default_target_methods=cls.DEFAULT_TARGET_METHODS,
             )
-            multiple_inputs = kwargs.get("multiple_inputs", False)
             def get_prediction(
                 target_method_name: str, sample_input_data: "model_types.SupportedLocalDataType"
             ) -> model_types.SupportedLocalDataType:

snowflake/ml/model/_packager/model_handlers/xgboost.py CHANGED Viewed

@@ -144,7 +144,12 @@ class XGBModelHandler(_base.BaseModelHandler[Union["xgboost.Booster", "xgboost.X
             model_type=cls.HANDLER_TYPE,
             handler_version=cls.HANDLER_VERSION,
             path=cls.MODEL_BLOB_FILE_OR_DIR,
-            options=model_meta_schema.XgboostModelBlobOptions({"xgb_estimator_type": model.__class__.__name__}),
+            options=model_meta_schema.XgboostModelBlobOptions(
+                {
+                    "xgb_estimator_type": model.__class__.__name__,
+                    "enable_categorical": getattr(model, "enable_categorical", False),
+                }
+            ),
         )
         model_meta.models[name] = base_meta
         model_meta.min_snowpark_ml_version = cls._MIN_SNOWPARK_ML_VERSION
@@ -152,11 +157,6 @@ class XGBModelHandler(_base.BaseModelHandler[Union["xgboost.Booster", "xgboost.X
         model_meta.env.include_if_absent(
             [
                 model_env.ModelDependency(requirement="scikit-learn", pip_name="scikit-learn"),
-            ],
-            check_local_version=True,
-        )
-        model_meta.env.include_if_absent(
-            [
                 model_env.ModelDependency(requirement="xgboost", pip_name="xgboost"),
             ],
             check_local_version=True,
@@ -190,6 +190,7 @@ class XGBModelHandler(_base.BaseModelHandler[Union["xgboost.Booster", "xgboost.X
             raise ValueError("Type of XGB estimator is illegal.")
         m = getattr(xgboost, xgb_estimator_type)()
         m.load_model(os.path.join(model_blob_path, model_blob_filename))
+        m.enable_categorical = model_blob_options.get("enable_categorical", False)
         if kwargs.get("use_gpu", False):
             assert type(kwargs.get("use_gpu", False)) == bool
@@ -225,8 +226,16 @@ class XGBModelHandler(_base.BaseModelHandler[Union["xgboost.Booster", "xgboost.X
             ) -> Callable[[custom_model.CustomModel, pd.DataFrame], pd.DataFrame]:
                 @custom_model.inference_api
                 def fn(self: custom_model.CustomModel, X: pd.DataFrame) -> pd.DataFrame:
+                    enable_categorical = False
+                    for col, d_type in X.dtypes.items():
+                        if pd.api.extensions.ExtensionDtype.is_dtype(d_type):
+                            continue
+                        if not np.issubdtype(d_type, np.number):
+                            # categorical columns are converted to numpy's str dtype
+                            X[col] = X[col].astype("category")
+                            enable_categorical = True
                     if isinstance(raw_model, xgboost.Booster):
-                        X = xgboost.DMatrix(X)
+                        X = xgboost.DMatrix(X, enable_categorical=enable_categorical)
                     res = getattr(raw_model, target_method)(X)

snowflake/ml/model/_packager/model_meta/model_meta.py CHANGED Viewed

@@ -65,7 +65,8 @@ def create_model_metadata(
         ext_modules: List of names of modules that need to be pickled with the model. Defaults to None.
         conda_dependencies: List of conda requirements for running the model. Defaults to None.
         pip_requirements: List of pip Python packages requirements for running the model. Defaults to None.
-        artifact_repository_map: A dict mapping from package channel to artifact repository name.
+        artifact_repository_map: A dict mapping from package channel to artifact repository name (e.g.
+            {'pip': 'snowflake.snowpark.pypi_shared_repository'}).
         resource_constraint: Mapping of resource constraint keys and values, e.g. {"architecture": "x86"}.
         target_platforms: List of target platforms to run the model.
         python_version: A string of python version where model is run. Used for user override. If specified as None,

snowflake/ml/model/_packager/model_meta/model_meta_schema.py CHANGED Viewed

@@ -63,6 +63,7 @@ class MLFlowModelBlobOptions(BaseModelBlobOptions):
 class XgboostModelBlobOptions(BaseModelBlobOptions):
     xgb_estimator_type: Required[str]
+    enable_categorical: NotRequired[bool]
 class PyTorchModelBlobOptions(BaseModelBlobOptions):

snowflake/ml/model/_packager/model_runtime/_snowml_inference_alternative_requirements.py CHANGED Viewed

@@ -6,7 +6,7 @@ REQUIREMENTS = [
     "aiohttp!=4.0.0a0, !=4.0.0a1",
     "anyio>=3.5.0,<5",
     "cachetools>=3.1.1,<6",
-    "cloudpickle>=2.0.0,<3",
+    "cloudpickle>=2.0.0",
     "cryptography",
     "fsspec>=2024.6.1,<2026",
     "importlib_resources>=6.1.1, <7",
@@ -21,12 +21,12 @@ REQUIREMENTS = [
     "requests",
     "retrying>=1.3.3,<2",
     "s3fs>=2024.6.1,<2026",
-    "scikit-learn>=1.4,<1.6",
+    "scikit-learn<1.6",
     "scipy>=1.9,<2",
-    "snowflake-connector-python>=3.12.0,<4",
+    "shap>=0.46.0,<1",
+    "snowflake-connector-python>=3.14.0,<4",
     "snowflake-snowpark-python>=1.17.0,<2,!=1.26.0",
     "snowflake.core>=1.0.2,<2",
     "sqlparse>=0.4,<1",
     "typing-extensions>=4.1.0,<5",
-    "xgboost>=1.7.3,<3",
 ]

snowflake/ml/model/_signatures/dmatrix_handler.py CHANGED Viewed

@@ -81,8 +81,16 @@ class XGBoostDMatrixHandler(base_handler.BaseDataHandler["xgboost.DMatrix"]):
     ) -> "xgboost.DMatrix":
         import xgboost as xgb
+        enable_categorical = False
+        for col, d_type in df.dtypes.items():
+            if pd.api.extensions.ExtensionDtype.is_dtype(d_type):
+                continue
+            if not np.issubdtype(d_type, np.number):
+                df[col] = df[col].astype("category")
+                enable_categorical = True
         if not features:
-            return xgb.DMatrix(df)
+            return xgb.DMatrix(df, enable_categorical=enable_categorical)
         else:
             feature_names = []
             feature_types = []
@@ -95,4 +103,9 @@ class XGBoostDMatrixHandler(base_handler.BaseDataHandler["xgboost.DMatrix"]):
                 assert isinstance(feature, core.FeatureSpec), "Invalid feature kind."
                 feature_names.append(feature.name)
                 feature_types.append(feature._dtype._numpy_type)
-            return xgb.DMatrix(df, feature_names=feature_names, feature_types=feature_types)
+            return xgb.DMatrix(
+                df,
+                feature_names=feature_names,
+                feature_types=feature_types,
+                enable_categorical=enable_categorical,
+            )

snowflake/ml/model/custom_model.py CHANGED Viewed

@@ -4,6 +4,7 @@ from typing import Any, Callable, Coroutine, Generator, Optional, Union
 import anyio
 import pandas as pd
+from typing_extensions import deprecated
 from snowflake.ml.model import type_hints as model_types
@@ -226,12 +227,12 @@ class CustomModel:
                 else:
                     raise TypeError("A non-method inference API function is not supported.")
-    def _get_partitioned_infer_methods(self) -> list[str]:
-        """Returns all methods in CLS with `partitioned_inference_api` as the outermost decorator."""
+    def _get_partitioned_methods(self) -> list[str]:
+        """Returns all methods in CLS with `partitioned_api` as the outermost decorator."""
         rv = []
         for cls_method_str in dir(self):
             cls_method = getattr(self, cls_method_str)
-            if getattr(cls_method, "_is_partitioned_inference_api", False):
+            if getattr(cls_method, "_is_partitioned_api", False):
                 if inspect.ismethod(cls_method):
                     rv.append(cls_method_str)
                 else:
@@ -282,9 +283,21 @@ def inference_api(
     return func
+def partitioned_api(
+    func: Callable[[model_types.CustomModelType, pd.DataFrame], pd.DataFrame],
+) -> Callable[[model_types.CustomModelType, pd.DataFrame], pd.DataFrame]:
+    func.__dict__["_is_inference_api"] = True
+    func.__dict__["_is_partitioned_api"] = True
+    return func
+@deprecated(
+    "snowflake.ml.custom_model.partitioned_inference_api is deprecated and will be removed in a future release."
+    " Use snowflake.ml.custom_model.partitioned_api instead."
+)
 def partitioned_inference_api(
     func: Callable[[model_types.CustomModelType, pd.DataFrame], pd.DataFrame],
 ) -> Callable[[model_types.CustomModelType, pd.DataFrame], pd.DataFrame]:
     func.__dict__["_is_inference_api"] = True
-    func.__dict__["_is_partitioned_inference_api"] = True
+    func.__dict__["_is_partitioned_api"] = True
     return func

snowflake/ml/model/model_signature.py CHANGED Viewed

@@ -71,9 +71,9 @@ def _truncate_data(
             warnings.warn(
                 formatting.unwrap(
                     f"""
-                    The sample input has {row_count} rows, thus a truncation happened before inferring signature.
-                    This might cause inaccurate signature inference.
-                    If that happens, consider specifying signature manually.
+                    The sample input has {row_count} rows. Using the first 100 rows to define the inputs and outputs
+                    of the model and the data types of each. Use `signatures` parameter to specify model inputs and
+                    outputs manually if the automatic inference is not correct.
                     """
                 ),
                 category=UserWarning,

snowflake/ml/modeling/calibration/calibrated_classifier_cv.py CHANGED Viewed

@@ -11,7 +11,7 @@ import cloudpickle as cp
 import numpy as np
 import pandas as pd
 from numpy import typing as npt
+from packaging import version
 import numpy
 import sklearn
@@ -60,6 +60,14 @@ DATAFRAME_TYPE = Union[DataFrame, pd.DataFrame]
 INFER_SIGNATURE_MAX_ROWS = 100
+SKLEARN_LOWER, SKLEARN_UPPER = ('1.4', '1.6')
+# Modeling library estimators require a smaller sklearn version range.
+if not version.Version(SKLEARN_LOWER) <= version.Version(sklearn.__version__) < version.Version(SKLEARN_UPPER):
+    raise Exception(
+        f"To use the modeling library, install scikit-learn version >= {SKLEARN_LOWER} and < {SKLEARN_UPPER}"
+    )
 class CalibratedClassifierCV(BaseTransformer):
     r"""Probability calibration with isotonic regression or logistic regression
     For more details on this class, see [sklearn.calibration.CalibratedClassifierCV]

snowflake/ml/modeling/cluster/affinity_propagation.py CHANGED Viewed

@@ -11,7 +11,7 @@ import cloudpickle as cp
 import numpy as np
 import pandas as pd
 from numpy import typing as npt
+from packaging import version
 import numpy
 import sklearn
@@ -60,6 +60,14 @@ DATAFRAME_TYPE = Union[DataFrame, pd.DataFrame]
 INFER_SIGNATURE_MAX_ROWS = 100
+SKLEARN_LOWER, SKLEARN_UPPER = ('1.4', '1.6')
+# Modeling library estimators require a smaller sklearn version range.
+if not version.Version(SKLEARN_LOWER) <= version.Version(sklearn.__version__) < version.Version(SKLEARN_UPPER):
+    raise Exception(
+        f"To use the modeling library, install scikit-learn version >= {SKLEARN_LOWER} and < {SKLEARN_UPPER}"
+    )
 class AffinityPropagation(BaseTransformer):
     r"""Perform Affinity Propagation Clustering of data
     For more details on this class, see [sklearn.cluster.AffinityPropagation]

snowflake-ml-python 1.8.3__py3-none-any.whl → 1.8.4__py3-none-any.whl

snowflake-ml-python 1.8.3py3-none-any.whl → 1.8.4py3-none-any.whl