PyPI - snowflake-ml-python - Versions diffs - 1.8.3__py3-none-any.whl → 1.8.5__py3-none-any.whl - Mend

snowflake-ml-python 1.8.3py3-none-any.whl → 1.8.5py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (196) hide show

snowflake/ml/model/_client/sql/service.py CHANGED Viewed

@@ -1,10 +1,8 @@
 import enum
-import json
 import textwrap
 from typing import Any, Optional, Union
 from snowflake import snowpark
-from snowflake.ml._internal import platform_capabilities
 from snowflake.ml._internal.utils import (
     identifier,
     query_result_checker,
@@ -16,22 +14,25 @@ from snowflake.snowpark import dataframe, functions as F, row, types as spt
 from snowflake.snowpark._internal import utils as snowpark_utils
+# The enum comes from https://docs.snowflake.com/en/sql-reference/sql/show-service-containers-in-service#output
+# except UNKNOWN
 class ServiceStatus(enum.Enum):
     UNKNOWN = "UNKNOWN"  # status is unknown because we have not received enough data from K8s yet.
     PENDING = "PENDING"  # resource set is being created, can't be used yet
-    READY = "READY"  # resource set has been deployed.
     SUSPENDING = "SUSPENDING"  # the service is set to suspended but the resource set is still in deleting state
     SUSPENDED = "SUSPENDED"  # the service is suspended and the resource set is deleted
     DELETING = "DELETING"  # resource set is being deleted
     FAILED = "FAILED"  # resource set has failed and cannot be used anymore
     DONE = "DONE"  # resource set has finished running
-    NOT_FOUND = "NOT_FOUND"  # not found or deleted
     INTERNAL_ERROR = "INTERNAL_ERROR"  # there was an internal service error.
+    RUNNING = "RUNNING"
+    DELETED = "DELETED"
 class ServiceSQLClient(_base._BaseSQLClient):
     MODEL_INFERENCE_SERVICE_ENDPOINT_NAME_COL_NAME = "name"
     MODEL_INFERENCE_SERVICE_ENDPOINT_INGRESS_URL_COL_NAME = "ingress_url"
+    SERVICE_STATUS = "service_status"
     def build_model_container(
         self,
@@ -133,18 +134,10 @@ class ServiceSQLClient(_base._BaseSQLClient):
             input_args_sql = ", ".join(f"'{arg}', {arg.identifier()}" for arg in input_args)
             args_sql = f"object_construct_keep_null({input_args_sql})"
-        if platform_capabilities.PlatformCapabilities.get_instance().is_nested_function_enabled():
-            fully_qualified_service_name = self.fully_qualified_object_name(
-                actual_database_name, actual_schema_name, service_name
-            )
-            fully_qualified_function_name = f"{fully_qualified_service_name}!{method_name.identifier()}"
-        else:
-            function_name = identifier.concat_names([service_name.identifier(), "_", method_name.identifier()])
-            fully_qualified_function_name = identifier.get_schema_level_object_identifier(
-                actual_database_name.identifier(),
-                actual_schema_name.identifier(),
-                function_name,
-            )
+        fully_qualified_service_name = self.fully_qualified_object_name(
+            actual_database_name, actual_schema_name, service_name
+        )
+        fully_qualified_function_name = f"{fully_qualified_service_name}!{method_name.identifier()}"
         sql = textwrap.dedent(
             f"""{with_sql}
@@ -208,22 +201,17 @@ class ServiceSQLClient(_base._BaseSQLClient):
         include_message: bool = False,
         statement_params: Optional[dict[str, Any]] = None,
     ) -> tuple[ServiceStatus, Optional[str]]:
-        system_func = "SYSTEM$GET_SERVICE_STATUS"
-        rows = (
-            query_result_checker.SqlResultValidator(
-                self._session,
-                f"CALL {system_func}('{self.fully_qualified_object_name(database_name, schema_name, service_name)}')",
-                statement_params=statement_params,
-            )
-            .has_dimensions(expected_rows=1, expected_cols=1)
-            .validate()
-        )
-        metadata = json.loads(rows[0][system_func])[0]
-        if metadata and metadata["status"]:
-            service_status = ServiceStatus(metadata["status"])
-            message = metadata["message"] if include_message else None
-            return service_status, message
-        return ServiceStatus.UNKNOWN, None
+        fully_qualified_object_name = self.fully_qualified_object_name(database_name, schema_name, service_name)
+        query = f"SHOW SERVICE CONTAINERS IN SERVICE {fully_qualified_object_name}"
+        rows = self._session.sql(query).collect(statement_params=statement_params)
+        if len(rows) == 0:
+            return ServiceStatus.UNKNOWN, None
+        row = rows[0]
+        service_status = row[ServiceSQLClient.SERVICE_STATUS]
+        message = row["message"] if include_message else None
+        if not isinstance(service_status, ServiceStatus):
+            return ServiceStatus.UNKNOWN, message
+        return ServiceStatus(service_status), message
     def drop_service(
         self,

snowflake/ml/model/_model_composer/model_composer.py CHANGED Viewed

@@ -142,30 +142,55 @@ class ModelComposer:
         conda_dep_dict = env_utils.validate_conda_dependency_string_list(
             conda_dependencies if conda_dependencies else []
         )
-        is_warehouse_runnable = (
-            not conda_dep_dict
-            or all(
-                chan == env_utils.DEFAULT_CHANNEL_NAME or chan == env_utils.SNOWFLAKE_CONDA_CHANNEL_URL
-                for chan in conda_dep_dict
-            )
-        ) and (not pip_requirements)
-        disable_explainability = (
-            target_platforms and model_types.TargetPlatform.SNOWPARK_CONTAINER_SERVICES in target_platforms
-        ) or (not is_warehouse_runnable)
-        if disable_explainability and options and options.get("enable_explainability", False):
-            warnings.warn(
-                ("The model can be deployed to Snowpark Container Services only if `enable_explainability=False`."),
-                category=UserWarning,
-                stacklevel=2,
+        enable_explainability = None
+        if options:
+            enable_explainability = options.get("enable_explainability", None)
+        # skip everything if user said False explicitly
+        if enable_explainability is None or enable_explainability is True:
+            is_warehouse_runnable = (
+                not conda_dep_dict
+                or all(
+                    chan == env_utils.DEFAULT_CHANNEL_NAME or chan == env_utils.SNOWFLAKE_CONDA_CHANNEL_URL
+                    for chan in conda_dep_dict
+                )
+            ) and (not pip_requirements)
+            only_spcs = (
+                target_platforms
+                and len(target_platforms) == 1
+                and model_types.TargetPlatform.SNOWPARK_CONTAINER_SERVICES in target_platforms
             )
+            if only_spcs or (not is_warehouse_runnable):
+                # if only SPCS and user asked for explainability we fail
+                if enable_explainability is True:
+                    raise ValueError(
+                        "`enable_explainability` cannot be set to True when the model is not runnable in WH "
+                        "or the target platforms include SPCS."
+                    )
+                elif not options:  # explicitly set flag to false in these cases if not specified
+                    options = model_types.BaseModelSaveOption()
+                    options["enable_explainability"] = False
+            elif (
+                target_platforms
+                and len(target_platforms) > 1
+                and model_types.TargetPlatform.SNOWPARK_CONTAINER_SERVICES in target_platforms
+            ):  # if both then only available for WH
+                if enable_explainability is True:
+                    warnings.warn(
+                        ("Explain function will only be available for model deployed to warehouse."),
+                        category=UserWarning,
+                        stacklevel=2,
+                    )
         if not options:
             options = model_types.BaseModelSaveOption()
-            if disable_explainability:
-                options["enable_explainability"] = False
-        if not snowpark_utils.is_in_stored_procedure():  # type: ignore[no-untyped-call]
+        if not snowpark_utils.is_in_stored_procedure() and target_platforms != [  # type: ignore[no-untyped-call]
+            model_types.TargetPlatform.SNOWPARK_CONTAINER_SERVICES  # no information schema check for SPCS-only models
+        ]:
             snowml_matched_versions = env_utils.get_matched_package_versions_in_information_schema(
                 self.session,
                 reqs=[requirements.Requirement(f"{env_utils.SNOWPARK_ML_PKG_NAME}=={snowml_version.VERSION}")],

snowflake/ml/model/_packager/model_handlers/_utils.py CHANGED Viewed

@@ -109,6 +109,35 @@ def get_input_signature(
     return input_sig
+def add_inferred_explain_method_signature(
+    model_meta: model_meta.ModelMetadata,
+    explain_method: str,
+    target_method: str,
+    background_data: model_types.SupportedDataType,
+    explain_fn: Callable[[model_types.SupportedLocalDataType], model_types.SupportedLocalDataType],
+    output_feature_names: Optional[Sequence[str]] = None,
+) -> model_meta.ModelMetadata:
+    inputs = get_input_signature(model_meta, target_method)
+    if output_feature_names is None:  # If not provided, assume output feature names are the same as input feature names
+        output_feature_names = [spec.name for spec in inputs]
+    if model_meta.model_type == "snowml":
+        suffixed_output_names = [identifier.concat_names([name, "_explanation"]) for name in output_feature_names]
+    else:
+        suffixed_output_names = [f"{name}_explanation" for name in output_feature_names]
+    truncated_background_data = get_truncated_sample_data(background_data, 5)
+    sig = model_signature.infer_signature(
+        input_data=truncated_background_data,
+        output_data=explain_fn(truncated_background_data),
+        input_feature_names=[spec.name for spec in inputs],
+        output_feature_names=suffixed_output_names,
+    )
+    model_meta.signatures[explain_method] = sig
+    return model_meta
 def add_explain_method_signature(
     model_meta: model_meta.ModelMetadata,
     explain_method: str,
@@ -236,8 +265,9 @@ def validate_model_task(passed_model_task: model_types.Task, inferred_model_task
 def get_explain_target_method(
     model_metadata: model_meta.ModelMetadata, target_methods_list: list[str]
 ) -> Optional[str]:
-    for method in model_metadata.signatures.keys():
-        if method in target_methods_list:
+    """Returns the first target method that is found in the model metadata signatures."""
+    for method in target_methods_list:
+        if method in model_metadata.signatures.keys():
             return method
     return None

snowflake/ml/model/_packager/model_handlers/custom.py CHANGED Viewed

@@ -72,7 +72,7 @@ class CustomModelHandler(_base.BaseModelHandler["custom_model.CustomModel"]):
                 predictions_df = target_method(model, sample_input_data)
             return predictions_df
-        for func_name in model._get_partitioned_infer_methods():
+        for func_name in model._get_partitioned_methods():
             function_properties = model_meta.function_properties.get(func_name, {})
             function_properties[model_meta_schema.FunctionProperties.PARTITIONED.value] = True
             model_meta.function_properties[func_name] = function_properties

snowflake/ml/model/_packager/model_handlers/pytorch.py CHANGED Viewed

@@ -82,6 +82,7 @@ class PyTorchHandler(_base.BaseModelHandler["torch.nn.Module"]):
         enable_explainability = kwargs.get("enable_explainability", False)
         if enable_explainability:
             raise NotImplementedError("Explainability is not supported for PyTorch model.")
+        multiple_inputs = kwargs.get("multiple_inputs", False)
         import torch
@@ -94,8 +95,6 @@ class PyTorchHandler(_base.BaseModelHandler["torch.nn.Module"]):
                 default_target_methods=cls.DEFAULT_TARGET_METHODS,
             )
-            multiple_inputs = kwargs.get("multiple_inputs", False)
             def get_prediction(
                 target_method_name: str, sample_input_data: "model_types.SupportedLocalDataType"
             ) -> model_types.SupportedLocalDataType:

snowflake/ml/model/_packager/model_handlers/sklearn.py CHANGED Viewed

@@ -1,6 +1,6 @@
 import os
 import warnings
-from typing import TYPE_CHECKING, Callable, Optional, Union, cast, final
+from typing import TYPE_CHECKING, Callable, Optional, Sequence, Union, cast, final
 import cloudpickle
 import numpy as np
@@ -38,6 +38,35 @@ def _unpack_container_runtime_pipeline(model: "sklearn.pipeline.Pipeline") -> "s
     return model
+def _apply_transforms_up_to_last_step(
+    model: Union["sklearn.base.BaseEstimator", "sklearn.pipeline.Pipeline"],
+    data: model_types.SupportedDataType,
+    input_feature_names: Optional[list[str]] = None,
+) -> pd.DataFrame:
+    """Apply all transformations in the sklearn pipeline model up to the last step."""
+    transformed_data = data
+    output_features_names = input_feature_names
+    if type_utils.LazyType("sklearn.pipeline.Pipeline").isinstance(model):
+        for step_name, step in model.steps[:-1]:  # type: ignore[attr-defined]
+            if not hasattr(step, "transform"):
+                raise ValueError(f"Step '{step_name}' does not have a 'transform' method.")
+            transformed_data = step.transform(transformed_data)
+            if output_features_names is None:
+                continue
+            elif hasattr(step, "get_feature_names_out"):
+                output_features_names = step.get_feature_names_out(output_features_names)
+            else:
+                raise ValueError(
+                    f"Step '{step_name}' in the pipeline does not have a 'get_feature_names_out' method. "
+                    "Feature names cannot be propagated."
+                )
+    if type_utils.LazyType("scipy.sparse.csr_matrix").isinstance(transformed_data):
+        # Convert to dense array if it's a sparse matrix
+        transformed_data = transformed_data.toarray()  # type: ignore[attr-defined]
+    return pd.DataFrame(transformed_data, columns=output_features_names)
 @final
 class SKLModelHandler(_base.BaseModelHandler[Union["sklearn.base.BaseEstimator", "sklearn.pipeline.Pipeline"]]):
     """Handler for scikit-learn based model.
@@ -58,7 +87,9 @@ class SKLModelHandler(_base.BaseModelHandler[Union["sklearn.base.BaseEstimator",
         "decision_function",
         "score_samples",
     ]
-    EXPLAIN_TARGET_METHODS = ["predict", "predict_proba", "predict_log_proba"]
+    # Prioritize predict_proba as it gives multi-class probabilities
+    EXPLAIN_TARGET_METHODS = ["predict_proba", "predict", "predict_log_proba"]
     @classmethod
     def can_handle(
@@ -160,17 +191,38 @@ class SKLModelHandler(_base.BaseModelHandler[Union["sklearn.base.BaseEstimator",
                         stacklevel=1,
                     )
                     enable_explainability = False
-                elif model_meta.task == model_types.Task.UNKNOWN or explain_target_method is None:
+                elif model_meta.task == model_types.Task.UNKNOWN:
+                    enable_explainability = False
+                elif explain_target_method is None:
                     enable_explainability = False
                 else:
                     enable_explainability = True
             if enable_explainability:
-                model_meta = handlers_utils.add_explain_method_signature(
-                    model_meta=model_meta,
-                    explain_method="explain",
-                    target_method=explain_target_method,
-                    output_return_type=model_task_and_output_type.output_type,
+                explain_target_method = str(explain_target_method)  # mypy complains if we don't cast to str here
+                input_signature = handlers_utils.get_input_signature(model_meta, explain_target_method)
+                transformed_background_data = _apply_transforms_up_to_last_step(
+                    model=model,
+                    data=background_data,
+                    input_feature_names=[spec.name for spec in input_signature],
                 )
+                try:
+                    model_meta = handlers_utils.add_inferred_explain_method_signature(
+                        model_meta=model_meta,
+                        explain_method="explain",
+                        target_method=explain_target_method,
+                        background_data=background_data,
+                        explain_fn=cls._build_explain_fn(model, background_data, input_signature),
+                        output_feature_names=transformed_background_data.columns,
+                    )
+                except Exception:
+                    if kwargs.get("enable_explainability", None):
+                        # user explicitly enabled explainability, so we should raise the error
+                        raise ValueError(
+                            "Explainability for this model is not supported. Please set `enable_explainability=False`"
+                        )
                 handlers_utils.save_background_data(
                     model_blobs_dir_path,
                     cls.EXPLAIN_ARTIFACTS_DIR,
@@ -222,11 +274,13 @@ class SKLModelHandler(_base.BaseModelHandler[Union["sklearn.base.BaseEstimator",
                     )
         if enable_explainability:
-            model_meta.env.include_if_absent([model_env.ModelDependency(requirement="shap", pip_name="shap")])
+            model_meta.env.include_if_absent([model_env.ModelDependency(requirement="shap>=0.46.0", pip_name="shap")])
             model_meta.explain_algorithm = model_meta_schema.ModelExplainAlgorithm.SHAP
         model_meta.env.include_if_absent(
-            [model_env.ModelDependency(requirement="scikit-learn", pip_name="scikit-learn")],
+            [
+                model_env.ModelDependency(requirement="scikit-learn", pip_name="scikit-learn"),
+            ],
             check_local_version=True,
         )
@@ -286,37 +340,8 @@ class SKLModelHandler(_base.BaseModelHandler[Union["sklearn.base.BaseEstimator",
                 @custom_model.inference_api
                 def explain_fn(self: custom_model.CustomModel, X: pd.DataFrame) -> pd.DataFrame:
-                    import shap
-                    try:
-                        explainer = shap.Explainer(raw_model, background_data)
-                        df = handlers_utils.convert_explanations_to_2D_df(raw_model, explainer(X).values)
-                    except TypeError:
-                        try:
-                            dtype_map = {spec.name: spec.as_dtype(force_numpy_dtype=True) for spec in signature.inputs}
-                            if isinstance(X, pd.DataFrame):
-                                X = X.astype(dtype_map, copy=False)
-                            if hasattr(raw_model, "predict_proba"):
-                                if isinstance(X, np.ndarray):
-                                    explanations = shap.Explainer(
-                                        raw_model.predict_proba, background_data.values  # type: ignore[union-attr]
-                                    )(X).values
-                                else:
-                                    explanations = shap.Explainer(raw_model.predict_proba, background_data)(X).values
-                            elif hasattr(raw_model, "predict"):
-                                if isinstance(X, np.ndarray):
-                                    explanations = shap.Explainer(
-                                        raw_model.predict, background_data.values  # type: ignore[union-attr]
-                                    )(X).values
-                                else:
-                                    explanations = shap.Explainer(raw_model.predict, background_data)(X).values
-                            else:
-                                raise ValueError("Missing any supported target method to explain.")
-                            df = handlers_utils.convert_explanations_to_2D_df(raw_model, explanations)
-                        except TypeError as e:
-                            raise ValueError(f"Explanation for this model type not supported yet: {str(e)}")
-                    return model_signature_utils.rename_pandas_df(df, signature.outputs)
+                    fn = cls._build_explain_fn(raw_model, background_data, signature.inputs)
+                    return model_signature_utils.rename_pandas_df(fn(X), signature.outputs)
                 if target_method == "explain":
                     return explain_fn
@@ -339,3 +364,37 @@ class SKLModelHandler(_base.BaseModelHandler[Union["sklearn.base.BaseEstimator",
         skl_model = _SKLModel(custom_model.ModelContext())
         return skl_model
+    @classmethod
+    def _build_explain_fn(
+        cls,
+        model: Union["sklearn.base.BaseEstimator", "sklearn.pipeline.Pipeline"],
+        background_data: model_types.SupportedDataType,
+        input_specs: Sequence[model_signature.BaseFeatureSpec],
+    ) -> Callable[[model_types.SupportedDataType], pd.DataFrame]:
+        import shap
+        import sklearn.pipeline
+        transformed_bg_data = _apply_transforms_up_to_last_step(model, background_data)
+        def explain_fn(data: model_types.SupportedDataType) -> pd.DataFrame:
+            transformed_data = _apply_transforms_up_to_last_step(model, data)
+            predictor = model[-1] if isinstance(model, sklearn.pipeline.Pipeline) else model
+            try:
+                explainer = shap.Explainer(predictor, transformed_bg_data)
+                return handlers_utils.convert_explanations_to_2D_df(model, explainer(transformed_data).values)
+            except TypeError:
+                if isinstance(data, pd.DataFrame):
+                    dtype_map = {spec.name: spec.as_dtype(force_numpy_dtype=True) for spec in input_specs}
+                    transformed_data = _apply_transforms_up_to_last_step(model, data.astype(dtype_map))
+                for explain_target_method in cls.EXPLAIN_TARGET_METHODS:
+                    if not hasattr(predictor, explain_target_method):
+                        continue
+                    explain_target_method_fn = getattr(predictor, explain_target_method)
+                    explanations = shap.Explainer(explain_target_method_fn, transformed_bg_data.values)(
+                        transformed_data.to_numpy()
+                    ).values
+                    return handlers_utils.convert_explanations_to_2D_df(model, explanations)
+                raise ValueError("Missing any supported target method to explain.")
+        return explain_fn

snowflake/ml/model/_packager/model_handlers/tensorflow.py CHANGED Viewed

@@ -88,6 +88,7 @@ class TensorFlowHandler(_base.BaseModelHandler["tensorflow.Module"]):
         import tensorflow
         assert isinstance(model, tensorflow.Module)
+        multiple_inputs = kwargs.get("multiple_inputs", False)
         is_keras_model = type_utils.LazyType("keras.Model").isinstance(model)
         is_tf_keras_model = type_utils.LazyType("tf_keras.Model").isinstance(model)
@@ -112,8 +113,6 @@ class TensorFlowHandler(_base.BaseModelHandler["tensorflow.Module"]):
                 default_target_methods=default_target_methods,
             )
-            multiple_inputs = kwargs.get("multiple_inputs", False)
             if is_keras_model and len(target_methods) > 1:
                 raise ValueError("Keras model can only have one target method.")
@@ -198,7 +197,6 @@ class TensorFlowHandler(_base.BaseModelHandler["tensorflow.Module"]):
         model_blobs_dir_path: str,
         **kwargs: Unpack[model_types.TensorflowLoadOptions],
     ) -> "tensorflow.Module":
-        os.environ["TF_USE_LEGACY_KERAS"] = "1"
         import tensorflow
         model_blob_path = os.path.join(model_blobs_dir_path, name)
@@ -209,7 +207,12 @@ class TensorFlowHandler(_base.BaseModelHandler["tensorflow.Module"]):
         load_path = os.path.join(model_blob_path, model_blob_filename)
         save_format = model_blob_options.get("save_format", "keras_tf")
         if save_format == "keras_tf":
-            m = tensorflow.keras.models.load_model(load_path)
+            if version.parse(tensorflow.keras.__version__) >= version.parse("3.0.0"):
+                import tf_keras
+                m = tf_keras.models.load_model(load_path)
+            else:
+                m = tensorflow.keras.models.load_model(load_path)
         else:
             m = tensorflow.saved_model.load(load_path)

snowflake/ml/model/_packager/model_handlers/torchscript.py CHANGED Viewed

@@ -76,6 +76,8 @@ class TorchScriptHandler(_base.BaseModelHandler["torch.jit.ScriptModule"]):
         if enable_explainability:
             raise NotImplementedError("Explainability is not supported for Torch Script model.")
+        multiple_inputs = kwargs.get("multiple_inputs", False)
         import torch
         assert isinstance(model, torch.jit.ScriptModule)
@@ -87,8 +89,6 @@ class TorchScriptHandler(_base.BaseModelHandler["torch.jit.ScriptModule"]):
                 default_target_methods=cls.DEFAULT_TARGET_METHODS,
             )
-            multiple_inputs = kwargs.get("multiple_inputs", False)
             def get_prediction(
                 target_method_name: str, sample_input_data: "model_types.SupportedLocalDataType"
             ) -> model_types.SupportedLocalDataType:

snowflake/ml/model/_packager/model_handlers/xgboost.py CHANGED Viewed

@@ -144,7 +144,12 @@ class XGBModelHandler(_base.BaseModelHandler[Union["xgboost.Booster", "xgboost.X
             model_type=cls.HANDLER_TYPE,
             handler_version=cls.HANDLER_VERSION,
             path=cls.MODEL_BLOB_FILE_OR_DIR,
-            options=model_meta_schema.XgboostModelBlobOptions({"xgb_estimator_type": model.__class__.__name__}),
+            options=model_meta_schema.XgboostModelBlobOptions(
+                {
+                    "xgb_estimator_type": model.__class__.__name__,
+                    "enable_categorical": getattr(model, "enable_categorical", False),
+                }
+            ),
         )
         model_meta.models[name] = base_meta
         model_meta.min_snowpark_ml_version = cls._MIN_SNOWPARK_ML_VERSION
@@ -152,11 +157,6 @@ class XGBModelHandler(_base.BaseModelHandler[Union["xgboost.Booster", "xgboost.X
         model_meta.env.include_if_absent(
             [
                 model_env.ModelDependency(requirement="scikit-learn", pip_name="scikit-learn"),
-            ],
-            check_local_version=True,
-        )
-        model_meta.env.include_if_absent(
-            [
                 model_env.ModelDependency(requirement="xgboost", pip_name="xgboost"),
             ],
             check_local_version=True,
@@ -190,6 +190,7 @@ class XGBModelHandler(_base.BaseModelHandler[Union["xgboost.Booster", "xgboost.X
             raise ValueError("Type of XGB estimator is illegal.")
         m = getattr(xgboost, xgb_estimator_type)()
         m.load_model(os.path.join(model_blob_path, model_blob_filename))
+        m.enable_categorical = model_blob_options.get("enable_categorical", False)
         if kwargs.get("use_gpu", False):
             assert type(kwargs.get("use_gpu", False)) == bool
@@ -225,8 +226,16 @@ class XGBModelHandler(_base.BaseModelHandler[Union["xgboost.Booster", "xgboost.X
             ) -> Callable[[custom_model.CustomModel, pd.DataFrame], pd.DataFrame]:
                 @custom_model.inference_api
                 def fn(self: custom_model.CustomModel, X: pd.DataFrame) -> pd.DataFrame:
+                    enable_categorical = False
+                    for col, d_type in X.dtypes.items():
+                        if pd.api.extensions.ExtensionDtype.is_dtype(d_type):
+                            continue
+                        if not np.issubdtype(d_type, np.number):
+                            # categorical columns are converted to numpy's str dtype
+                            X[col] = X[col].astype("category")
+                            enable_categorical = True
                     if isinstance(raw_model, xgboost.Booster):
-                        X = xgboost.DMatrix(X)
+                        X = xgboost.DMatrix(X, enable_categorical=enable_categorical)
                     res = getattr(raw_model, target_method)(X)

snowflake/ml/model/_packager/model_meta/model_meta.py CHANGED Viewed

@@ -65,7 +65,8 @@ def create_model_metadata(
         ext_modules: List of names of modules that need to be pickled with the model. Defaults to None.
         conda_dependencies: List of conda requirements for running the model. Defaults to None.
         pip_requirements: List of pip Python packages requirements for running the model. Defaults to None.
-        artifact_repository_map: A dict mapping from package channel to artifact repository name.
+        artifact_repository_map: A dict mapping from package channel to artifact repository name (e.g.
+            {'pip': 'snowflake.snowpark.pypi_shared_repository'}).
         resource_constraint: Mapping of resource constraint keys and values, e.g. {"architecture": "x86"}.
         target_platforms: List of target platforms to run the model.
         python_version: A string of python version where model is run. Used for user override. If specified as None,

snowflake/ml/model/_packager/model_meta/model_meta_schema.py CHANGED Viewed

@@ -63,6 +63,7 @@ class MLFlowModelBlobOptions(BaseModelBlobOptions):
 class XgboostModelBlobOptions(BaseModelBlobOptions):
     xgb_estimator_type: Required[str]
+    enable_categorical: NotRequired[bool]
 class PyTorchModelBlobOptions(BaseModelBlobOptions):

snowflake/ml/model/_packager/model_runtime/_snowml_inference_alternative_requirements.py CHANGED Viewed

@@ -6,13 +6,13 @@ REQUIREMENTS = [
     "aiohttp!=4.0.0a0, !=4.0.0a1",
     "anyio>=3.5.0,<5",
     "cachetools>=3.1.1,<6",
-    "cloudpickle>=2.0.0,<3",
+    "cloudpickle>=2.0.0",
     "cryptography",
     "fsspec>=2024.6.1,<2026",
     "importlib_resources>=6.1.1, <7",
     "numpy>=1.23,<2",
     "packaging>=20.9,<25",
-    "pandas>=1.0.0,<3",
+    "pandas>=2.1.4,<3",
     "pyarrow",
     "pydantic>=2.8.2, <3",
     "pyjwt>=2.0.0, <3",
@@ -21,9 +21,10 @@ REQUIREMENTS = [
     "requests",
     "retrying>=1.3.3,<2",
     "s3fs>=2024.6.1,<2026",
-    "scikit-learn>=1.4,<1.6",
+    "scikit-learn<1.6",
     "scipy>=1.9,<2",
-    "snowflake-connector-python>=3.12.0,<4",
+    "shap>=0.46.0,<1",
+    "snowflake-connector-python>=3.15.0,<4",
     "snowflake-snowpark-python>=1.17.0,<2,!=1.26.0",
     "snowflake.core>=1.0.2,<2",
     "sqlparse>=0.4,<1",

snowflake/ml/model/_signatures/dmatrix_handler.py CHANGED Viewed

@@ -81,8 +81,16 @@ class XGBoostDMatrixHandler(base_handler.BaseDataHandler["xgboost.DMatrix"]):
     ) -> "xgboost.DMatrix":
         import xgboost as xgb
+        enable_categorical = False
+        for col, d_type in df.dtypes.items():
+            if pd.api.extensions.ExtensionDtype.is_dtype(d_type):
+                continue
+            if not np.issubdtype(d_type, np.number):
+                df[col] = df[col].astype("category")
+                enable_categorical = True
         if not features:
-            return xgb.DMatrix(df)
+            return xgb.DMatrix(df, enable_categorical=enable_categorical)
         else:
             feature_names = []
             feature_types = []
@@ -95,4 +103,9 @@ class XGBoostDMatrixHandler(base_handler.BaseDataHandler["xgboost.DMatrix"]):
                 assert isinstance(feature, core.FeatureSpec), "Invalid feature kind."
                 feature_names.append(feature.name)
                 feature_types.append(feature._dtype._numpy_type)
-            return xgb.DMatrix(df, feature_names=feature_names, feature_types=feature_types)
+            return xgb.DMatrix(
+                df,
+                feature_names=feature_names,
+                feature_types=feature_types,
+                enable_categorical=enable_categorical,
+            )

snowflake-ml-python 1.8.3__py3-none-any.whl → 1.8.5__py3-none-any.whl

snowflake-ml-python 1.8.3py3-none-any.whl → 1.8.5py3-none-any.whl