PyPI - snowflake-ml-python - Versions diffs - 1.8.4__py3-none-any.whl → 1.8.6__py3-none-any.whl - Mend

snowflake-ml-python 1.8.4py3-none-any.whl → 1.8.6py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (41) hide show

snowflake/ml/_internal/telemetry.py +42 -16
snowflake/ml/_internal/utils/connection_params.py +196 -0
snowflake/ml/data/data_connector.py +1 -1
snowflake/ml/jobs/__init__.py +2 -0
snowflake/ml/jobs/_utils/constants.py +12 -2
snowflake/ml/jobs/_utils/function_payload_utils.py +43 -0
snowflake/ml/jobs/_utils/interop_utils.py +1 -1
snowflake/ml/jobs/_utils/payload_utils.py +95 -39
snowflake/ml/jobs/_utils/scripts/constants.py +22 -0
snowflake/ml/jobs/_utils/scripts/mljob_launcher.py +67 -2
snowflake/ml/jobs/_utils/spec_utils.py +30 -6
snowflake/ml/jobs/_utils/stage_utils.py +119 -0
snowflake/ml/jobs/_utils/types.py +5 -1
snowflake/ml/jobs/decorators.py +10 -7
snowflake/ml/jobs/job.py +176 -28
snowflake/ml/jobs/manager.py +119 -26
snowflake/ml/model/_client/model/model_impl.py +58 -0
snowflake/ml/model/_client/model/model_version_impl.py +90 -0
snowflake/ml/model/_client/ops/model_ops.py +6 -3
snowflake/ml/model/_client/ops/service_ops.py +24 -7
snowflake/ml/model/_client/service/model_deployment_spec.py +11 -0
snowflake/ml/model/_client/sql/model_version.py +1 -1
snowflake/ml/model/_client/sql/service.py +73 -28
snowflake/ml/model/_client/sql/stage.py +5 -2
snowflake/ml/model/_model_composer/model_composer.py +3 -1
snowflake/ml/model/_packager/model_handlers/sklearn.py +1 -1
snowflake/ml/model/_packager/model_handlers/snowmlmodel.py +103 -73
snowflake/ml/model/_packager/model_runtime/_snowml_inference_alternative_requirements.py +3 -2
snowflake/ml/model/_signatures/core.py +24 -0
snowflake/ml/monitoring/explain_visualize.py +160 -22
snowflake/ml/monitoring/model_monitor.py +0 -4
snowflake/ml/registry/registry.py +34 -14
snowflake/ml/utils/connection_params.py +9 -3
snowflake/ml/utils/html_utils.py +263 -0
snowflake/ml/version.py +1 -1
{snowflake_ml_python-1.8.4.dist-info → snowflake_ml_python-1.8.6.dist-info}/METADATA +40 -13
{snowflake_ml_python-1.8.4.dist-info → snowflake_ml_python-1.8.6.dist-info}/RECORD +40 -37
{snowflake_ml_python-1.8.4.dist-info → snowflake_ml_python-1.8.6.dist-info}/WHEEL +1 -1
snowflake/ml/monitoring/model_monitor_version.py +0 -1
{snowflake_ml_python-1.8.4.dist-info → snowflake_ml_python-1.8.6.dist-info}/licenses/LICENSE.txt +0 -0
{snowflake_ml_python-1.8.4.dist-info → snowflake_ml_python-1.8.6.dist-info}/top_level.txt +0 -0

snowflake/ml/model/_client/model/model_version_impl.py CHANGED Viewed

@@ -38,6 +38,96 @@ class ModelVersion(lineage_node.LineageNode):
     def __init__(self) -> None:
         raise RuntimeError("ModelVersion's initializer is not meant to be used. Use `version` from model instead.")
+    def _repr_html_(self) -> str:
+        """Generate an HTML representation of the model version.
+        Returns:
+            str: HTML string containing formatted model version details.
+        """
+        from snowflake.ml.utils import html_utils
+        # Get task
+        try:
+            task = self.get_model_task().value
+        except Exception:
+            task = (
+                html_utils.create_error_message("Not available")
+                .replace('<em style="color: #888; font-style: italic;">', "")
+                .replace("</em>", "")
+            )
+        # Get functions info for display
+        try:
+            functions = self.show_functions()
+            if not functions:
+                functions_html = html_utils.create_error_message("No functions available")
+            else:
+                functions_list = []
+                for func in functions:
+                    try:
+                        sig_html = func["signature"]._repr_html_()
+                    except Exception:
+                        # Fallback to simple display if can't display signature
+                        sig_html = f"<pre style='margin: 5px 0;'>{func['signature']}</pre>"
+                    function_content = f"""
+                        <div style="margin: 5px 0;">
+                            <strong>Target Method:</strong> {func['target_method']}
+                        </div>
+                        <div style="margin: 5px 0;">
+                            <strong>Function Type:</strong> {func.get('target_method_function_type', 'N/A')}
+                        </div>
+                        <div style="margin: 5px 0;">
+                            <strong>Partitioned:</strong> {func.get('is_partitioned', False)}
+                        </div>
+                        <div style="margin: 10px 0;">
+                            <strong>Signature:</strong>
+                            {sig_html}
+                        </div>
+                    """
+                    functions_list.append(
+                        html_utils.create_collapsible_section(
+                            title=func["name"], content=function_content, open_by_default=False
+                        )
+                    )
+                functions_html = "".join(functions_list)
+        except Exception:
+            functions_html = html_utils.create_error_message("Error retrieving functions")
+        # Get metrics for display
+        try:
+            metrics = self.show_metrics()
+            if not metrics:
+                metrics_html = html_utils.create_error_message("No metrics available")
+            else:
+                metrics_html = ""
+                for metric_name, value in metrics.items():
+                    metrics_html += html_utils.create_metric_item(metric_name, value)
+        except Exception:
+            metrics_html = html_utils.create_error_message("Error retrieving metrics")
+        # Create main content sections
+        main_info = html_utils.create_grid_section(
+            [
+                ("Model Name", self.model_name),
+                ("Version", f'<strong style="color: #28a745;">{self.version_name}</strong>'),
+                ("Full Name", self.fully_qualified_model_name),
+                ("Description", self.description),
+                ("Task", task),
+            ]
+        )
+        functions_section = html_utils.create_section_header("Functions") + html_utils.create_content_section(
+            functions_html
+        )
+        metrics_section = html_utils.create_section_header("Metrics") + html_utils.create_content_section(metrics_html)
+        content = main_info + functions_section + metrics_section
+        return html_utils.create_base_container("Model Version Details", content)
     @classmethod
     def _ref(
         cls,

snowflake/ml/model/_client/ops/model_ops.py CHANGED Viewed

@@ -643,14 +643,17 @@ class ModelOperator:
         # TODO(sdas): Figure out a better way to filter out MODEL_BUILD_ services server side.
         fully_qualified_service_names = [str(service) for service in json_array if "MODEL_BUILD_" not in service]
-        result = []
+        result: list[ServiceInfo] = []
         for fully_qualified_service_name in fully_qualified_service_names:
             ingress_url: Optional[str] = None
             db, schema, service_name = sql_identifier.parse_fully_qualified_name(fully_qualified_service_name)
-            service_status, _ = self._service_client.get_service_status(
+            statuses = self._service_client.get_service_container_statuses(
                 database_name=db, schema_name=schema, service_name=service_name, statement_params=statement_params
             )
+            if len(statuses) == 0:
+                return result
+            service_status = statuses[0].service_status
             for res_row in self._service_client.show_endpoints(
                 database_name=db, schema_name=schema, service_name=service_name, statement_params=statement_params
             ):

snowflake/ml/model/_client/ops/service_ops.py CHANGED Viewed

@@ -125,6 +125,7 @@ class ServiceOperator:
             stage_path = self._create_temp_stage(database_name, schema_name, statement_params)
         else:
             stage_path = None
+        self._model_deployment_spec.clear()
         self._model_deployment_spec.add_model_spec(
             database_name=database_name,
             schema_name=schema_name,
@@ -168,7 +169,7 @@ class ServiceOperator:
             schema_name=service_schema_name,
             service_name=service_name,
             service_status_list_if_exists=[
-                service_sql.ServiceStatus.READY,
+                service_sql.ServiceStatus.RUNNING,
                 service_sql.ServiceStatus.SUSPENDING,
                 service_sql.ServiceStatus.SUSPENDED,
             ],
@@ -324,14 +325,15 @@ class ServiceOperator:
                         )
                         continue
-                service_status, message = self._service_client.get_service_status(
+                statuses = self._service_client.get_service_container_statuses(
                     database_name=service_log_meta.service.database_name,
                     schema_name=service_log_meta.service.schema_name,
                     service_name=service_log_meta.service.service_name,
                     include_message=True,
                     statement_params=statement_params,
                 )
-                if (service_status != service_sql.ServiceStatus.READY) or (
+                service_status = statuses[0].service_status
+                if (service_status != service_sql.ServiceStatus.RUNNING) or (
                     service_status != service_log_meta.service_status
                 ):
                     service_log_meta.service_status = service_status
@@ -340,7 +342,19 @@ class ServiceOperator:
                         f"{service_log_meta.service.display_service_name} is "
                         f"{service_log_meta.service_status.value}."
                     )
-                    module_logger.info(f"Service message: {message}")
+                    for status in statuses:
+                        if status.instance_id is not None:
+                            instance_status, container_status = None, None
+                            if status.instance_status is not None:
+                                instance_status = status.instance_status.value
+                            if status.container_status is not None:
+                                container_status = status.container_status.value
+                            module_logger.info(
+                                f"Instance[{status.instance_id}]: "
+                                f"instance status: {instance_status}, "
+                                f"container status: {container_status}, "
+                                f"message: {status.message}"
+                            )
                 new_logs, new_offset = fetch_logs(
                     service_log_meta.service,
@@ -352,13 +366,14 @@ class ServiceOperator:
                 # check if model build service is done
                 if not service_log_meta.is_model_build_service_done:
-                    service_status, _ = self._service_client.get_service_status(
+                    statuses = self._service_client.get_service_container_statuses(
                         database_name=model_build_service.database_name,
                         schema_name=model_build_service.schema_name,
                         service_name=model_build_service.service_name,
                         include_message=False,
                         statement_params=statement_params,
                     )
+                    service_status = statuses[0].service_status
                     if service_status == service_sql.ServiceStatus.DONE:
                         set_service_log_metadata_to_model_inference(
@@ -428,20 +443,21 @@ class ServiceOperator:
         if service_status_list_if_exists is None:
             service_status_list_if_exists = [
                 service_sql.ServiceStatus.PENDING,
-                service_sql.ServiceStatus.READY,
+                service_sql.ServiceStatus.RUNNING,
                 service_sql.ServiceStatus.SUSPENDING,
                 service_sql.ServiceStatus.SUSPENDED,
                 service_sql.ServiceStatus.DONE,
                 service_sql.ServiceStatus.FAILED,
             ]
         try:
-            service_status, _ = self._service_client.get_service_status(
+            statuses = self._service_client.get_service_container_statuses(
                 database_name=database_name,
                 schema_name=schema_name,
                 service_name=service_name,
                 include_message=False,
                 statement_params=statement_params,
             )
+            service_status = statuses[0].service_status
             return any(service_status == status for status in service_status_list_if_exists)
         except exceptions.SnowparkSQLException:
             return False
@@ -538,6 +554,7 @@ class ServiceOperator:
         )
         try:
+            self._model_deployment_spec.clear()
             # save the spec
             self._model_deployment_spec.add_model_spec(
                 database_name=database_name,

snowflake/ml/model/_client/service/model_deployment_spec.py CHANGED Viewed

@@ -29,6 +29,17 @@ class ModelDeploymentSpec:
         self.database: Optional[sql_identifier.SqlIdentifier] = None
         self.schema: Optional[sql_identifier.SqlIdentifier] = None
+    def clear(self) -> None:
+        """Reset the deployment spec to its initial state."""
+        self._models = []
+        self._image_build = None
+        self._service = None
+        self._job = None
+        self._model_loggings = None
+        self._inference_spec = {}
+        self.database = None
+        self.schema = None
     def add_model_spec(
         self,
         database_name: sql_identifier.SqlIdentifier,

snowflake/ml/model/_client/sql/model_version.py CHANGED Viewed

@@ -293,7 +293,7 @@ class ModelVersionSQLClient(_base._BaseSQLClient):
         if snowpark_utils.is_in_stored_procedure():  # type: ignore[no-untyped-call]
             options = {"parallel": 10}
             cursor = self._session._conn._cursor
-            cursor._download(stage_location_url, str(target_path), options)  # type: ignore[union-attr]
+            cursor._download(stage_location_url, str(target_path), options)
             cursor.fetchall()
         else:
             query_result_checker.SqlResultValidator(

snowflake/ml/model/_client/sql/service.py CHANGED Viewed

@@ -1,5 +1,6 @@
+import dataclasses
 import enum
-import json
+import logging
 import textwrap
 from typing import Any, Optional, Union
@@ -14,23 +15,59 @@ from snowflake.ml.model._model_composer.model_method import constants
 from snowflake.snowpark import dataframe, functions as F, row, types as spt
 from snowflake.snowpark._internal import utils as snowpark_utils
+logger = logging.getLogger(__name__)
 class ServiceStatus(enum.Enum):
-    UNKNOWN = "UNKNOWN"  # status is unknown because we have not received enough data from K8s yet.
-    PENDING = "PENDING"  # resource set is being created, can't be used yet
-    READY = "READY"  # resource set has been deployed.
-    SUSPENDING = "SUSPENDING"  # the service is set to suspended but the resource set is still in deleting state
-    SUSPENDED = "SUSPENDED"  # the service is suspended and the resource set is deleted
-    DELETING = "DELETING"  # resource set is being deleted
-    FAILED = "FAILED"  # resource set has failed and cannot be used anymore
-    DONE = "DONE"  # resource set has finished running
-    NOT_FOUND = "NOT_FOUND"  # not found or deleted
-    INTERNAL_ERROR = "INTERNAL_ERROR"  # there was an internal service error.
+    PENDING = "PENDING"
+    RUNNING = "RUNNING"
+    FAILED = "FAILED"
+    DONE = "DONE"
+    SUSPENDING = "SUSPENDING"
+    SUSPENDED = "SUSPENDED"
+    DELETING = "DELETING"
+    DELETED = "DELETED"
+    INTERNAL_ERROR = "INTERNAL_ERROR"
+class InstanceStatus(enum.Enum):
+    PENDING = "PENDING"
+    READY = "READY"
+    FAILED = "FAILED"
+    TERMINATING = "TERMINATING"
+    SUCCEEDED = "SUCCEEDED"
+class ContainerStatus(enum.Enum):
+    PENDING = "PENDING"
+    READY = "READY"
+    DONE = "DONE"
+    FAILED = "FAILED"
+    UNKNOWN = "UNKNOWN"
+@dataclasses.dataclass
+class ServiceStatusInfo:
+    """
+    Class containing information about service container status.
+    Reference: https://docs.snowflake.com/en/sql-reference/sql/show-service-containers-in-service
+    """
+    service_status: ServiceStatus
+    instance_id: Optional[int] = None
+    instance_status: Optional[InstanceStatus] = None
+    container_status: Optional[ContainerStatus] = None
+    message: Optional[str] = None
 class ServiceSQLClient(_base._BaseSQLClient):
     MODEL_INFERENCE_SERVICE_ENDPOINT_NAME_COL_NAME = "name"
     MODEL_INFERENCE_SERVICE_ENDPOINT_INGRESS_URL_COL_NAME = "ingress_url"
+    SERVICE_STATUS = "service_status"
+    INSTANCE_ID = "instance_id"
+    INSTANCE_STATUS = "instance_status"
+    CONTAINER_STATUS = "status"
+    MESSAGE = "message"
     def build_model_container(
         self,
@@ -79,6 +116,10 @@ class ServiceSQLClient(_base._BaseSQLClient):
     ) -> tuple[str, snowpark.AsyncJob]:
         assert model_deployment_spec_yaml_str or model_deployment_spec_file_rel_path
         if model_deployment_spec_yaml_str:
+            model_deployment_spec_yaml_str = snowpark_utils.escape_single_quotes(
+                model_deployment_spec_yaml_str
+            )  # type: ignore[no-untyped-call]
+            logger.info(f"Deploying model with spec={model_deployment_spec_yaml_str}")
             sql_str = f"CALL SYSTEM$DEPLOY_MODEL('{model_deployment_spec_yaml_str}')"
         else:
             sql_str = f"CALL SYSTEM$DEPLOY_MODEL('@{stage_path}/{model_deployment_spec_file_rel_path}')"
@@ -190,7 +231,7 @@ class ServiceSQLClient(_base._BaseSQLClient):
         )
         return str(rows[0][system_func])
-    def get_service_status(
+    def get_service_container_statuses(
         self,
         *,
         database_name: Optional[sql_identifier.SqlIdentifier],
@@ -198,23 +239,27 @@ class ServiceSQLClient(_base._BaseSQLClient):
         service_name: sql_identifier.SqlIdentifier,
         include_message: bool = False,
         statement_params: Optional[dict[str, Any]] = None,
-    ) -> tuple[ServiceStatus, Optional[str]]:
-        system_func = "SYSTEM$GET_SERVICE_STATUS"
-        rows = (
-            query_result_checker.SqlResultValidator(
-                self._session,
-                f"CALL {system_func}('{self.fully_qualified_object_name(database_name, schema_name, service_name)}')",
-                statement_params=statement_params,
+    ) -> list[ServiceStatusInfo]:
+        fully_qualified_object_name = self.fully_qualified_object_name(database_name, schema_name, service_name)
+        query = f"SHOW SERVICE CONTAINERS IN SERVICE {fully_qualified_object_name}"
+        rows = self._session.sql(query).collect(statement_params=statement_params)
+        statuses = []
+        for r in rows:
+            instance_status, container_status = None, None
+            if r[ServiceSQLClient.INSTANCE_STATUS] is not None:
+                instance_status = InstanceStatus(r[ServiceSQLClient.INSTANCE_STATUS])
+            if r[ServiceSQLClient.CONTAINER_STATUS] is not None:
+                container_status = ContainerStatus(r[ServiceSQLClient.CONTAINER_STATUS])
+            statuses.append(
+                ServiceStatusInfo(
+                    service_status=ServiceStatus(r[ServiceSQLClient.SERVICE_STATUS]),
+                    instance_id=r[ServiceSQLClient.INSTANCE_ID],
+                    instance_status=instance_status,
+                    container_status=container_status,
+                    message=r[ServiceSQLClient.MESSAGE] if include_message else None,
+                )
             )
-            .has_dimensions(expected_rows=1, expected_cols=1)
-            .validate()
-        )
-        metadata = json.loads(rows[0][system_func])[0]
-        if metadata and metadata["status"]:
-            service_status = ServiceStatus(metadata["status"])
-            message = metadata["message"] if include_message else None
-            return service_status, message
-        return ServiceStatus.UNKNOWN, None
+        return statuses
     def drop_service(
         self,

snowflake/ml/model/_client/sql/stage.py CHANGED Viewed

@@ -12,9 +12,12 @@ class StageSQLClient(_base._BaseSQLClient):
         schema_name: Optional[sql_identifier.SqlIdentifier],
         stage_name: sql_identifier.SqlIdentifier,
         statement_params: Optional[dict[str, Any]] = None,
-    ) -> None:
+    ) -> str:
+        fq_stage_name = self.fully_qualified_object_name(database_name, schema_name, stage_name)
         query_result_checker.SqlResultValidator(
             self._session,
-            f"CREATE SCOPED TEMPORARY STAGE {self.fully_qualified_object_name(database_name, schema_name, stage_name)}",
+            f"CREATE SCOPED TEMPORARY STAGE {fq_stage_name}",
             statement_params=statement_params,
         ).has_dimensions(expected_rows=1, expected_cols=1).validate()
+        return fq_stage_name

snowflake/ml/model/_model_composer/model_composer.py CHANGED Viewed

@@ -188,7 +188,9 @@ class ModelComposer:
         if not options:
             options = model_types.BaseModelSaveOption()
-        if not snowpark_utils.is_in_stored_procedure():  # type: ignore[no-untyped-call]
+        if not snowpark_utils.is_in_stored_procedure() and target_platforms != [  # type: ignore[no-untyped-call]
+            model_types.TargetPlatform.SNOWPARK_CONTAINER_SERVICES  # no information schema check for SPCS-only models
+        ]:
             snowml_matched_versions = env_utils.get_matched_package_versions_in_information_schema(
                 self.session,
                 reqs=[requirements.Requirement(f"{env_utils.SNOWPARK_ML_PKG_NAME}=={snowml_version.VERSION}")],

snowflake/ml/model/_packager/model_handlers/sklearn.py CHANGED Viewed

@@ -216,7 +216,7 @@ class SKLModelHandler(_base.BaseModelHandler[Union["sklearn.base.BaseEstimator",
                         explain_fn=cls._build_explain_fn(model, background_data, input_signature),
                         output_feature_names=transformed_background_data.columns,
                     )
-                except ValueError:
+                except Exception:
                     if kwargs.get("enable_explainability", None):
                         # user explicitly enabled explainability, so we should raise the error
                         raise ValueError(

snowflake/ml/model/_packager/model_handlers/snowmlmodel.py CHANGED Viewed

@@ -5,6 +5,7 @@ from typing import TYPE_CHECKING, Any, Callable, Optional, cast, final
 import cloudpickle
 import numpy as np
 import pandas as pd
+import shap
 from typing_extensions import TypeGuard, Unpack
 from snowflake.ml._internal import type_utils
@@ -25,6 +26,19 @@ if TYPE_CHECKING:
     from snowflake.ml.modeling.framework.base import BaseEstimator
+def _apply_transforms_up_to_last_step(
+    model: "BaseEstimator",
+    data: model_types.SupportedDataType,
+) -> pd.DataFrame:
+    """Apply all transformations in the snowml pipeline model up to the last step."""
+    if type_utils.LazyType("snowflake.ml.modeling.pipeline.Pipeline").isinstance(model):
+        for step_name, step in model.steps[:-1]:  # type: ignore[attr-defined]
+            if not hasattr(step, "transform"):
+                raise ValueError(f"Step '{step_name}' does not have a 'transform' method.")
+            data = pd.DataFrame(step.transform(data))
+    return data
 @final
 class SnowMLModelHandler(_base.BaseModelHandler["BaseEstimator"]):
     """Handler for SnowML based model.
@@ -39,7 +53,7 @@ class SnowMLModelHandler(_base.BaseModelHandler["BaseEstimator"]):
     _HANDLER_MIGRATOR_PLANS: dict[str, type[base_migrator.BaseModelHandlerMigrator]] = {}
     DEFAULT_TARGET_METHODS = ["predict", "transform", "predict_proba", "predict_log_proba", "decision_function"]
-    EXPLAIN_TARGET_METHODS = ["predict", "predict_proba", "predict_log_proba"]
+    EXPLAIN_TARGET_METHODS = ["predict_proba", "predict", "predict_log_proba"]
     IS_AUTO_SIGNATURE = True
@@ -97,11 +111,6 @@ class SnowMLModelHandler(_base.BaseModelHandler["BaseEstimator"]):
                     return result
                 except exceptions.SnowflakeMLException:
                     pass  # Do nothing and continue to the next method
-        if enable_explainability:
-            raise ValueError(
-                "Explain only supported for xgboost, lightgbm and sklearn (not pipeline) Snowpark ML models."
-            )
         return None
     @classmethod
@@ -189,23 +198,46 @@ class SnowMLModelHandler(_base.BaseModelHandler["BaseEstimator"]):
             else:
                 enable_explainability = True
         if enable_explainability:
-            model_task_and_output_type = model_task_utils.resolve_model_task_and_output_type(
-                python_base_obj, model_meta.task
-            )
-            model_meta.task = model_task_and_output_type.task
-            model_meta = handlers_utils.add_explain_method_signature(
-                model_meta=model_meta,
-                explain_method="explain",
-                target_method=explain_target_method,
-                output_return_type=model_task_and_output_type.output_type,
-            )
-            background_data = handlers_utils.get_explainability_supported_background(
-                sample_input_data, model_meta, explain_target_method
-            )
-            if background_data is not None:
-                handlers_utils.save_background_data(
-                    model_blobs_dir_path, cls.EXPLAIN_ARTIFACTS_DIR, cls.BG_DATA_FILE_SUFFIX, name, background_data
+            try:
+                model_task_and_output_type = model_task_utils.resolve_model_task_and_output_type(
+                    python_base_obj, model_meta.task
+                )
+                model_meta.task = model_task_and_output_type.task
+                background_data = handlers_utils.get_explainability_supported_background(
+                    sample_input_data, model_meta, explain_target_method
                 )
+                if type_utils.LazyType("snowflake.ml.modeling.pipeline.Pipeline").isinstance(model):
+                    transformed_df = _apply_transforms_up_to_last_step(model, sample_input_data)
+                    explain_fn = cls._build_explain_fn(model, background_data)
+                    model_meta = handlers_utils.add_inferred_explain_method_signature(
+                        model_meta=model_meta,
+                        explain_method="explain",
+                        target_method=explain_target_method,  # type: ignore[arg-type]
+                        background_data=background_data,
+                        explain_fn=explain_fn,
+                        output_feature_names=transformed_df.columns,
+                    )
+                else:
+                    model_meta = handlers_utils.add_explain_method_signature(
+                        model_meta=model_meta,
+                        explain_method="explain",
+                        target_method=explain_target_method,
+                        output_return_type=model_task_and_output_type.output_type,
+                    )
+                if background_data is not None:
+                    handlers_utils.save_background_data(
+                        model_blobs_dir_path,
+                        cls.EXPLAIN_ARTIFACTS_DIR,
+                        cls.BG_DATA_FILE_SUFFIX,
+                        name,
+                        background_data,
+                    )
+            except Exception:
+                if kwargs.get("enable_explainability", None):
+                    # user explicitly enabled explainability, so we should raise the error
+                    raise ValueError(
+                        "Explainability for this model is not supported. Please set `enable_explainability=False`"
+                    )
         model_blob_path = os.path.join(model_blobs_dir_path, name)
         os.makedirs(model_blob_path, exist_ok=True)
@@ -251,6 +283,53 @@ class SnowMLModelHandler(_base.BaseModelHandler["BaseEstimator"]):
         assert isinstance(m, BaseEstimator)
         return m
+    @classmethod
+    def _build_explain_fn(
+        cls, model: "BaseEstimator", background_data: model_types.SupportedDataType
+    ) -> Callable[[model_types.SupportedDataType], pd.DataFrame]:
+        predictor = model
+        is_pipeline = type_utils.LazyType("snowflake.ml.modeling.pipeline.Pipeline").isinstance(model)
+        if is_pipeline:
+            background_data = _apply_transforms_up_to_last_step(model, background_data)
+            predictor = model.steps[-1][1]  # type: ignore[attr-defined]
+        def explain_fn(data: model_types.SupportedDataType) -> pd.DataFrame:
+            data = _apply_transforms_up_to_last_step(model, data)
+            tree_methods = ["to_xgboost", "to_lightgbm"]
+            non_tree_methods = ["to_sklearn", None]  # None just uses the predictor directly
+            for method_name in tree_methods:
+                try:
+                    base_model = getattr(predictor, method_name)()
+                    explainer = shap.TreeExplainer(base_model)
+                    return handlers_utils.convert_explanations_to_2D_df(model, explainer.shap_values(data))
+                except exceptions.SnowflakeMLException:
+                    pass  # Do nothing and continue to the next method
+            for method_name in non_tree_methods:  # type: ignore[assignment]
+                try:
+                    base_model = getattr(predictor, method_name)() if method_name is not None else predictor
+                    try:
+                        explainer = shap.Explainer(base_model, masker=background_data)
+                        return handlers_utils.convert_explanations_to_2D_df(base_model, explainer(data).values)
+                    except TypeError:
+                        for explain_target_method in cls.EXPLAIN_TARGET_METHODS:
+                            if not hasattr(base_model, explain_target_method):
+                                continue
+                            explain_target_method_fn = getattr(base_model, explain_target_method)
+                            if isinstance(data, np.ndarray):
+                                explainer = shap.Explainer(
+                                    explain_target_method_fn,
+                                    background_data.values,  # type: ignore[union-attr]
+                                )
+                            else:
+                                explainer = shap.Explainer(explain_target_method_fn, background_data)
+                            return handlers_utils.convert_explanations_to_2D_df(base_model, explainer(data).values)
+                except Exception:
+                    pass  # Do nothing and continue to the next method
+            raise ValueError("Explainability for this model is not supported.")
+        return explain_fn
     @classmethod
     def convert_as_custom_model(
         cls,
@@ -286,57 +365,8 @@ class SnowMLModelHandler(_base.BaseModelHandler["BaseEstimator"]):
                 @custom_model.inference_api
                 def explain_fn(self: custom_model.CustomModel, X: pd.DataFrame) -> pd.DataFrame:
-                    import shap
-                    tree_methods = ["to_xgboost", "to_lightgbm"]
-                    non_tree_methods = ["to_sklearn"]
-                    for method_name in tree_methods:
-                        try:
-                            base_model = getattr(raw_model, method_name)()
-                            explainer = shap.TreeExplainer(base_model)
-                            df = handlers_utils.convert_explanations_to_2D_df(raw_model, explainer.shap_values(X))
-                            return model_signature_utils.rename_pandas_df(df, signature.outputs)
-                        except exceptions.SnowflakeMLException:
-                            pass  # Do nothing and continue to the next method
-                    for method_name in non_tree_methods:
-                        try:
-                            base_model = getattr(raw_model, method_name)()
-                            try:
-                                explainer = shap.Explainer(base_model, masker=background_data)
-                                df = handlers_utils.convert_explanations_to_2D_df(base_model, explainer(X).values)
-                            except TypeError:
-                                try:
-                                    dtype_map = {
-                                        spec.name: spec.as_dtype(force_numpy_dtype=True) for spec in signature.inputs
-                                    }
-                                    if isinstance(X, pd.DataFrame):
-                                        X = X.astype(dtype_map, copy=False)
-                                    if hasattr(base_model, "predict_proba"):
-                                        if isinstance(X, np.ndarray):
-                                            explainer = shap.Explainer(
-                                                base_model.predict_proba,
-                                                background_data.values,  # type: ignore[union-attr]
-                                            )
-                                        else:
-                                            explainer = shap.Explainer(base_model.predict_proba, background_data)
-                                    elif hasattr(base_model, "predict"):
-                                        if isinstance(X, np.ndarray):
-                                            explainer = shap.Explainer(
-                                                base_model.predict, background_data.values  # type: ignore[union-attr]
-                                            )
-                                        else:
-                                            explainer = shap.Explainer(base_model.predict, background_data)
-                                    else:
-                                        raise ValueError("Missing any supported target method to explain.")
-                                    df = handlers_utils.convert_explanations_to_2D_df(base_model, explainer(X).values)
-                                except TypeError as e:
-                                    raise ValueError(f"Explanation for this model type not supported yet: {str(e)}")
-                            return model_signature_utils.rename_pandas_df(df, signature.outputs)
-                        except exceptions.SnowflakeMLException:
-                            pass  # Do nothing and continue to the next method
-                    raise ValueError("The model must be an xgboost, lightgbm or sklearn (not pipeline) estimator.")
+                    fn = cls._build_explain_fn(raw_model, background_data)
+                    return model_signature_utils.rename_pandas_df(fn(X), signature.outputs)
                 if target_method == "explain":
                     return explain_fn

snowflake-ml-python 1.8.4__py3-none-any.whl → 1.8.6__py3-none-any.whl

snowflake-ml-python 1.8.4py3-none-any.whl → 1.8.6py3-none-any.whl