PyPI - snowflake-ml-python - Versions diffs - 1.8.2__py3-none-any.whl → 1.8.4__py3-none-any.whl - Mend

snowflake-ml-python 1.8.2py3-none-any.whl → 1.8.4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (322) hide show

snowflake/ml/monitoring/_client/model_monitor_sql_client.py CHANGED Viewed

@@ -1,4 +1,4 @@
-from typing import Any, Dict, List, Mapping, Optional
+from typing import Any, Mapping, Optional
 from snowflake import snowpark
 from snowflake.ml._internal.utils import (
@@ -15,7 +15,7 @@ MODEL_JSON_MODEL_NAME_FIELD = "model_name"
 MODEL_JSON_VERSION_NAME_FIELD = "version_name"
-def _build_sql_list_from_columns(columns: List[sql_identifier.SqlIdentifier]) -> str:
+def _build_sql_list_from_columns(columns: list[sql_identifier.SqlIdentifier]) -> str:
     sql_list = ", ".join([f"'{column}'" for column in columns])
     return f"({sql_list})"
@@ -60,17 +60,17 @@ class ModelMonitorSQLClient:
         function_name: str,
         warehouse_name: sql_identifier.SqlIdentifier,
         timestamp_column: sql_identifier.SqlIdentifier,
-        id_columns: List[sql_identifier.SqlIdentifier],
-        prediction_score_columns: List[sql_identifier.SqlIdentifier],
-        prediction_class_columns: List[sql_identifier.SqlIdentifier],
-        actual_score_columns: List[sql_identifier.SqlIdentifier],
-        actual_class_columns: List[sql_identifier.SqlIdentifier],
+        id_columns: list[sql_identifier.SqlIdentifier],
+        prediction_score_columns: list[sql_identifier.SqlIdentifier],
+        prediction_class_columns: list[sql_identifier.SqlIdentifier],
+        actual_score_columns: list[sql_identifier.SqlIdentifier],
+        actual_class_columns: list[sql_identifier.SqlIdentifier],
         refresh_interval: str,
         aggregation_window: str,
         baseline_database: Optional[sql_identifier.SqlIdentifier] = None,
         baseline_schema: Optional[sql_identifier.SqlIdentifier] = None,
         baseline: Optional[sql_identifier.SqlIdentifier] = None,
-        statement_params: Optional[Dict[str, Any]] = None,
+        statement_params: Optional[dict[str, Any]] = None,
     ) -> None:
         baseline_sql = ""
         if baseline:
@@ -103,7 +103,7 @@ class ModelMonitorSQLClient:
         database_name: Optional[sql_identifier.SqlIdentifier] = None,
         schema_name: Optional[sql_identifier.SqlIdentifier] = None,
         monitor_name: sql_identifier.SqlIdentifier,
-        statement_params: Optional[Dict[str, Any]] = None,
+        statement_params: Optional[dict[str, Any]] = None,
     ) -> None:
         search_database_name = database_name or self._database_name
         search_schema_name = schema_name or self._schema_name
@@ -116,8 +116,8 @@ class ModelMonitorSQLClient:
     def show_model_monitors(
         self,
         *,
-        statement_params: Optional[Dict[str, Any]] = None,
-    ) -> List[snowpark.Row]:
+        statement_params: Optional[dict[str, Any]] = None,
+    ) -> list[snowpark.Row]:
         fully_qualified_schema_name = ".".join([self._database_name.identifier(), self._schema_name.identifier()])
         return (
             query_result_checker.SqlResultValidator(
@@ -135,7 +135,7 @@ class ModelMonitorSQLClient:
         database_name: Optional[sql_identifier.SqlIdentifier] = None,
         schema_name: Optional[sql_identifier.SqlIdentifier] = None,
         monitor_name: sql_identifier.SqlIdentifier,
-        statement_params: Optional[Dict[str, Any]] = None,
+        statement_params: Optional[dict[str, Any]] = None,
     ) -> bool:
         search_database_name = database_name or self._database_name
         search_schema_name = schema_name or self._schema_name
@@ -153,7 +153,7 @@ class ModelMonitorSQLClient:
     def validate_monitor_warehouse(
         self,
         warehouse_name: sql_identifier.SqlIdentifier,
-        statement_params: Optional[Dict[str, Any]] = None,
+        statement_params: Optional[dict[str, Any]] = None,
     ) -> None:
         """Validate warehouse provided for monitoring exists.
@@ -177,11 +177,11 @@ class ModelMonitorSQLClient:
         *,
         source_column_schema: Mapping[str, types.DataType],
         timestamp_column: sql_identifier.SqlIdentifier,
-        prediction_score_columns: List[sql_identifier.SqlIdentifier],
-        prediction_class_columns: List[sql_identifier.SqlIdentifier],
-        actual_score_columns: List[sql_identifier.SqlIdentifier],
-        actual_class_columns: List[sql_identifier.SqlIdentifier],
-        id_columns: List[sql_identifier.SqlIdentifier],
+        prediction_score_columns: list[sql_identifier.SqlIdentifier],
+        prediction_class_columns: list[sql_identifier.SqlIdentifier],
+        actual_score_columns: list[sql_identifier.SqlIdentifier],
+        actual_class_columns: list[sql_identifier.SqlIdentifier],
+        id_columns: list[sql_identifier.SqlIdentifier],
     ) -> None:
         """Ensures all columns exist in the source table.
@@ -221,11 +221,11 @@ class ModelMonitorSQLClient:
         source_schema: Optional[sql_identifier.SqlIdentifier],
         source: sql_identifier.SqlIdentifier,
         timestamp_column: sql_identifier.SqlIdentifier,
-        prediction_score_columns: List[sql_identifier.SqlIdentifier],
-        prediction_class_columns: List[sql_identifier.SqlIdentifier],
-        actual_score_columns: List[sql_identifier.SqlIdentifier],
-        actual_class_columns: List[sql_identifier.SqlIdentifier],
-        id_columns: List[sql_identifier.SqlIdentifier],
+        prediction_score_columns: list[sql_identifier.SqlIdentifier],
+        prediction_class_columns: list[sql_identifier.SqlIdentifier],
+        actual_score_columns: list[sql_identifier.SqlIdentifier],
+        actual_class_columns: list[sql_identifier.SqlIdentifier],
+        id_columns: list[sql_identifier.SqlIdentifier],
     ) -> None:
         source_database = source_database or self._database_name
         source_schema = source_schema or self._schema_name
@@ -250,7 +250,7 @@ class ModelMonitorSQLClient:
         self,
         operation: str,
         monitor_name: sql_identifier.SqlIdentifier,
-        statement_params: Optional[Dict[str, Any]] = None,
+        statement_params: Optional[dict[str, Any]] = None,
     ) -> None:
         if operation not in {"SUSPEND", "RESUME"}:
             raise ValueError(f"Operation {operation} not supported for altering Dynamic Tables")
@@ -263,7 +263,7 @@ class ModelMonitorSQLClient:
     def suspend_monitor(
         self,
         monitor_name: sql_identifier.SqlIdentifier,
-        statement_params: Optional[Dict[str, Any]] = None,
+        statement_params: Optional[dict[str, Any]] = None,
     ) -> None:
         self._alter_monitor(
             operation="SUSPEND",
@@ -274,7 +274,7 @@ class ModelMonitorSQLClient:
     def resume_monitor(
         self,
         monitor_name: sql_identifier.SqlIdentifier,
-        statement_params: Optional[Dict[str, Any]] = None,
+        statement_params: Optional[dict[str, Any]] = None,
     ) -> None:
         self._alter_monitor(
             operation="RESUME",

snowflake/ml/monitoring/_manager/model_monitor_manager.py CHANGED Viewed

@@ -1,5 +1,5 @@
 import json
-from typing import Any, Dict, List, Optional
+from typing import Any, Optional
 from snowflake import snowpark
 from snowflake.ml._internal.utils import sql_identifier
@@ -20,7 +20,7 @@ class ModelMonitorManager:
         database_name: sql_identifier.SqlIdentifier,
         schema_name: sql_identifier.SqlIdentifier,
         *,
-        statement_params: Optional[Dict[str, Any]] = None,
+        statement_params: Optional[dict[str, Any]] = None,
     ) -> None:
         """
         Opens a ModelMonitorManager for a given database and schema.
@@ -64,7 +64,7 @@ class ModelMonitorManager:
             f"Found: {existing_target_methods}."
         )
-    def _build_column_list_from_input(self, columns: Optional[List[str]]) -> List[sql_identifier.SqlIdentifier]:
+    def _build_column_list_from_input(self, columns: Optional[list[str]]) -> list[sql_identifier.SqlIdentifier]:
         return [sql_identifier.SqlIdentifier(column_name) for column_name in columns] if columns else []
     def add_monitor(
@@ -172,7 +172,7 @@ class ModelMonitorManager:
         """
         rows = self._model_monitor_client.show_model_monitors(statement_params=self.statement_params)
-        def model_match_fn(model_details: Dict[str, str]) -> bool:
+        def model_match_fn(model_details: dict[str, str]) -> bool:
             return (
                 model_details[model_monitor_sql_client.MODEL_JSON_MODEL_NAME_FIELD] == model_version.model_name
                 and model_details[model_monitor_sql_client.MODEL_JSON_VERSION_NAME_FIELD] == model_version.version_name
@@ -215,7 +215,7 @@ class ModelMonitorManager:
             name=monitor_name_id,
         )
-    def show_model_monitors(self) -> List[snowpark.Row]:
+    def show_model_monitors(self) -> list[snowpark.Row]:
         """Show all model monitors in the registry.
         Returns:

snowflake/ml/monitoring/entities/model_monitor_config.py CHANGED Viewed

@@ -1,5 +1,5 @@
 from dataclasses import dataclass
-from typing import List, Optional
+from typing import Optional
 from snowflake.ml.model._client.model import model_version_impl
@@ -14,20 +14,20 @@ class ModelMonitorSourceConfig:
     timestamp_column: str
     """Name of column in the source containing timestamp."""
-    id_columns: List[str]
+    id_columns: list[str]
     """List of columns in the source containing unique identifiers."""
-    prediction_score_columns: Optional[List[str]] = None
+    prediction_score_columns: Optional[list[str]] = None
     """List of columns in the source containing prediction scores.
     Can be regression scores for regression models and probability scores for classification models."""
-    prediction_class_columns: Optional[List[str]] = None
+    prediction_class_columns: Optional[list[str]] = None
     """List of columns in the source containing prediction classes for classification models."""
-    actual_score_columns: Optional[List[str]] = None
+    actual_score_columns: Optional[list[str]] = None
     """List of columns in the source containing actual scores."""
-    actual_class_columns: Optional[List[str]] = None
+    actual_class_columns: Optional[list[str]] = None
     """List of columns in the source containing actual classes for classification models."""
     baseline: Optional[str] = None

snowflake/ml/monitoring/explain_visualize.py ADDED Viewed

@@ -0,0 +1,286 @@
+from typing import Union, cast, overload
+import altair as alt
+import numpy as np
+import pandas as pd
+import snowflake.snowpark.dataframe as sp_df
+from snowflake import snowpark
+from snowflake.ml.model import model_signature, type_hints
+from snowflake.ml.model._signatures import snowpark_handler
+@overload
+def plot_force(
+    shap_row: snowpark.Row,
+    features_row: snowpark.Row,
+    base_value: float = 0.0,
+    figsize: tuple[float, float] = (600, 200),
+    contribution_threshold: float = 0.05,
+) -> alt.LayerChart:
+    ...
+@overload
+def plot_force(
+    shap_row: pd.Series,
+    features_row: pd.Series,
+    base_value: float = 0.0,
+    figsize: tuple[float, float] = (600, 200),
+    contribution_threshold: float = 0.05,
+) -> alt.LayerChart:
+    ...
+def plot_force(
+    shap_row: Union[pd.Series, snowpark.Row],
+    features_row: Union[pd.Series, snowpark.Row],
+    base_value: float = 0.0,
+    figsize: tuple[float, float] = (600, 200),
+    contribution_threshold: float = 0.05,
+) -> alt.LayerChart:
+    """
+    Create a force plot for SHAP values with stacked bars based on influence direction.
+    Args:
+        shap_row: pandas Series or snowpark Row containing SHAP values for a specific instance
+        features_row: pandas Series or snowpark Row containing the feature values for the same instance
+        base_value: base value of the predictions. Defaults to 0, but is usually the model's average prediction
+        figsize: tuple of (width, height) for the plot
+        contribution_threshold:
+            Only features with magnitude greater than contribution_threshold as a percentage of the
+            total absolute SHAP values will be plotted. Defaults to 0.05 (5%)
+    Returns:
+        Altair chart object
+    """
+    if isinstance(shap_row, snowpark.Row):
+        shap_row = pd.Series(shap_row.as_dict())
+    if isinstance(features_row, snowpark.Row):
+        features_row = pd.Series(features_row.as_dict())
+    # Create a dataframe for plotting
+    positive_label = "Positive"
+    negative_label = "Negative"
+    plot_df = pd.DataFrame(
+        [
+            {
+                "feature": feature,
+                "feature_value": features_row.iloc[index],
+                "feature_annotated": f"{feature}: {features_row.iloc[index]}",
+                "influence_value": shap_row.iloc[index],
+                "bar_direction": positive_label if shap_row.iloc[index] >= 0 else negative_label,
+            }
+            for index, feature in enumerate(features_row.index)
+        ]
+    )
+    # Calculate cumulative positions for the stacked bars
+    shap_sum = np.sum(shap_row)
+    current_position_pos = shap_sum
+    current_position_neg = shap_sum
+    positions = []
+    total_abs_value_sum = np.sum(plot_df["influence_value"].abs())
+    max_abs_value = plot_df["influence_value"].abs().max()
+    spacing = max_abs_value * 0.07  # Use 2% of max value as spacing between bars
+    # Sort by absolute value to have largest impacts first
+    plot_df = plot_df.reindex(plot_df["influence_value"].abs().sort_values(ascending=False).index)
+    for _, row in plot_df.iterrows():
+        # Skip features with small contributions
+        row_influence_value = row["influence_value"]
+        if abs(row_influence_value) / total_abs_value_sum < contribution_threshold:
+            continue
+        if row_influence_value >= 0:
+            start = current_position_pos - spacing
+            end = current_position_pos - row_influence_value
+            current_position_pos = end
+        else:
+            start = current_position_neg + spacing
+            end = current_position_neg + abs(row_influence_value)
+            current_position_neg = end
+        positions.append(
+            {
+                "start": start,
+                "end": end,
+                "avg": (start + end) / 2,
+                "influence_value": row_influence_value,
+                "influence_annotated": f"Influence: {row_influence_value}",
+                "feature_value": row["feature_value"],
+                "feature_annotated": row["feature_annotated"],
+                "bar_direction": row["bar_direction"],
+            }
+        )
+    position_df = pd.DataFrame(positions)
+    # Create force plot using Altair
+    blue_color = "#1f77b4"
+    red_color = "#d62728"
+    width, height = figsize
+    bars: alt.Chart = (
+        alt.Chart(position_df)
+        .mark_bar(size=10)
+        .encode(
+            x=alt.X("start:Q", title="Feature Impact"),
+            x2=alt.X2("end:Q"),
+            color=alt.Color(
+                "bar_direction:N",
+                scale=alt.Scale(domain=[positive_label, negative_label], range=[red_color, blue_color]),
+                legend=alt.Legend(title="Influence Direction"),
+            ),
+            tooltip=["influence_value", "feature_value"],
+        )
+        .properties(title="Feature Influence (SHAP values)", width=width, height=height)
+    ).interactive()
+    arrow: alt.Chart = (
+        alt.Chart(position_df)
+        .mark_point(shape="triangle", filled=True, fillOpacity=1)
+        .encode(
+            x=alt.X("start:Q"),
+            angle=alt.Angle("bar_direction:N", scale=alt.Scale(domain=["Positive", "Negative"], range=[90, -90])),
+            color=alt.Color(
+                "bar_direction:N", scale=alt.Scale(domain=["Positive", "Negative"], range=["#1f77b4", "#d62728"])
+            ),
+            size=alt.SizeValue(300),
+            tooltip=alt.value(None),
+        )
+    )
+    # Add a vertical line at the base value
+    zero_line: alt.Chart = alt.Chart(pd.DataFrame({"x": [base_value]})).mark_rule(strokeDash=[3, 3]).encode(x="x:Q")
+    # Add text labels on each bar
+    feature_labels = (
+        alt.Chart(position_df)
+        .mark_text(align="center", baseline="line-bottom", dy=30, fontSize=11)
+        .encode(
+            x=alt.X("avg:Q"),
+            text=alt.Text("feature_annotated:N"),  # Display with 2 decimal places
+            color=alt.value("grey"),  # Label color for positive values
+            tooltip=["feature_value"],
+        )
+    )
+    return cast(alt.LayerChart, bars + feature_labels + zero_line + arrow)
+def plot_influence_sensitivity(
+    feature_values: pd.Series, shap_values: pd.Series, figsize: tuple[float, float] = (600, 400)
+) -> alt.Chart:
+    """
+    Create a SHAP dependence scatter plot for a specific feature.
+    Args:
+        feature_values: pandas Series containing the feature values for a specific feature
+        shap_values: pandas Series containing the SHAP values for the same feature
+        figsize: tuple of (width, height) for the plot
+    Returns:
+        Altair chart object
+    """
+    unique_vals = np.sort(np.unique(feature_values.values))
+    max_points_per_unique_value = float(np.max(np.bincount(np.searchsorted(unique_vals, feature_values.values))))
+    points_per_value = len(feature_values.values) / len(unique_vals)
+    is_categorical = float(max(max_points_per_unique_value, points_per_value)) > 10
+    kwargs = (
+        {
+            "x": alt.X("feature_value:N", title="Feature Value"),
+            "color": alt.Color("feature_value:N").legend(None),
+            "xOffset": "jitter:Q",
+        }
+        if is_categorical
+        else {"x": alt.X("feature_value:Q", title="Feature Value")}
+    )
+    # Create a dataframe for plotting
+    plot_df = pd.DataFrame({"feature_value": feature_values, "shap_value": shap_values})
+    width, height = figsize
+    # Create scatter plot
+    scatter = (
+        alt.Chart(plot_df)
+        .transform_calculate(jitter="random()")
+        .mark_circle(size=60, opacity=0.7)
+        .encode(
+            y=alt.Y("shap_value:Q", title="SHAP Value"),
+            tooltip=["feature_value", "shap_value"],
+            **kwargs,
+        )
+        .properties(title="SHAP Dependence Scatter Plot", width=width, height=height)
+    )
+    return cast(alt.Chart, scatter)
+def plot_violin(
+    shap_df: type_hints.SupportedDataType,
+    feature_df: type_hints.SupportedDataType,
+    figsize: tuple[float, float] = (600, 200),
+) -> alt.Chart:
+    """
+    Create a violin plot per feature showing the distribution of SHAP values.
+    Args:
+        shap_df: 2D array containing SHAP values for multiple features
+        feature_df: 2D array containing the corresponding feature values
+        figsize: tuple of (width, height) for the plot
+    Returns:
+        Altair chart object
+    """
+    shap_df_pd = _convert_to_pandas_df(shap_df)
+    feature_df_pd = _convert_to_pandas_df(feature_df)
+    # Assert that the input dataframes are 2D
+    assert len(shap_df_pd.shape) == 2, f"shap_df must be 2D, but got shape {shap_df_pd.shape}"
+    assert len(feature_df_pd.shape) == 2, f"feature_df must be 2D, but got shape {feature_df_pd.shape}"
+    # Prepare data for plotting
+    plot_data = pd.DataFrame(
+        {
+            "feature_name": feature_df_pd.columns.repeat(shap_df_pd.shape[0]),
+            "shap_value": shap_df_pd.transpose().values.flatten(),
+        }
+    )
+    # Order the rows by the absolute sum of SHAP values per feature
+    feature_abs_sum = shap_df_pd.abs().sum(axis=0)
+    sorted_features = feature_abs_sum.sort_values(ascending=False).index
+    column_sort_order = [feature_df_pd.columns[shap_df_pd.columns.get_loc(col)] for col in sorted_features]
+    # Create the violin plot
+    width, height = figsize
+    violin = (
+        alt.Chart(plot_data)
+        .transform_density(density="shap_value", groupby=["feature_name"], as_=["shap_value", "density"])
+        .mark_area(orient="vertical")
+        .encode(
+            y=alt.Y("density:Q", title=None).stack("center").impute(None).axis(labels=False, grid=False, ticks=True),
+            x=alt.X("shap_value:Q", title="SHAP Value"),
+            row=alt.Row("feature_name:N", sort=column_sort_order).spacing(0),
+            color=alt.Color("feature_name:N", legend=None),
+            tooltip=["feature_name", "shap_value"],
+        )
+        .properties(width=width, height=height)
+    ).interactive()
+    return cast(alt.Chart, violin)
+def _convert_to_pandas_df(
+    data: type_hints.SupportedDataType,
+) -> pd.DataFrame:
+    if isinstance(data, sp_df.DataFrame):
+        return snowpark_handler.SnowparkDataFrameHandler.convert_to_df(data)
+    return model_signature._convert_local_data_to_df(data)

snowflake-ml-python 1.8.2__py3-none-any.whl → 1.8.4__py3-none-any.whl

snowflake-ml-python 1.8.2py3-none-any.whl → 1.8.4py3-none-any.whl