PyPI - validmind - Versions diffs - 2.7.5__py3-none-any.whl → 2.7.7__py3-none-any.whl - Mend

validmind 2.7.5py3-none-any.whl → 2.7.7py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (149) hide show

validmind/tests/ongoing_monitoring/ScoreBandsDrift.py ADDED Viewed

@@ -0,0 +1,212 @@
+# Copyright © 2023-2024 ValidMind Inc. All rights reserved.
+# See the LICENSE file in the root of this repository for details.
+# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
+from typing import List
+import numpy as np
+import pandas as pd
+from validmind import tags, tasks
+from validmind.vm_models import VMDataset, VMModel
+@tags("visualization", "credit_risk", "scorecard")
+@tasks("classification")
+def ScoreBandsDrift(
+    datasets: List[VMDataset],
+    model: VMModel,
+    score_column: str = "score",
+    score_bands: list = None,
+    drift_threshold: float = 20.0,
+):
+    """
+    Analyzes drift in population distribution and default rates across score bands.
+    ### Purpose
+    The Score Bands Drift test is designed to evaluate changes in score-based risk segmentation
+    over time. By comparing population distribution and default rates across score bands between
+    reference and monitoring datasets, this test helps identify whether the model's risk
+    stratification remains stable in production. This is crucial for understanding if the model's
+    scoring behavior maintains its intended risk separation and whether specific score ranges
+    have experienced significant shifts.
+    ### Test Mechanism
+    This test proceeds by segmenting scores into predefined bands and analyzing three key metrics
+    across these bands: population distribution, predicted default rates, and observed default
+    rates. For each band, it computes these metrics for both reference and monitoring datasets
+    and quantifies drift as percentage changes. The test provides both detailed band-by-band
+    comparisons and overall stability assessment, with special attention to bands showing
+    significant drift.
+    ### Signs of High Risk
+    - Large shifts in population distribution across bands
+    - Significant changes in default rates within bands
+    - Inconsistent drift patterns between adjacent bands
+    - Divergence between predicted and observed rates
+    - Systematic shifts in risk concentration
+    - Empty or sparse score bands in monitoring data
+    ### Strengths
+    - Provides comprehensive view of score-based drift
+    - Identifies specific score ranges with instability
+    - Enables comparison of multiple risk metrics
+    - Includes both distribution and performance drift
+    - Supports business-relevant score segmentation
+    - Maintains interpretable drift thresholds
+    ### Limitations
+    - Sensitive to choice of score band boundaries
+    - Requires sufficient samples in each band
+    - Cannot suggest optimal band adjustments
+    - May not capture within-band distribution changes
+    - Limited to predefined scoring metrics
+    - Complex interpretation with multiple drift signals
+    """
+    # Validate score column
+    if score_column not in datasets[0].df.columns:
+        raise ValueError(
+            f"Score column '{score_column}' not found in reference dataset"
+        )
+    if score_column not in datasets[1].df.columns:
+        raise ValueError(
+            f"Score column '{score_column}' not found in monitoring dataset"
+        )
+    # Default score bands if none provided
+    if score_bands is None:
+        score_bands = [410, 440, 470]
+    # Create band labels
+    band_labels = [
+        f"{score_bands[i]}-{score_bands[i+1]}" for i in range(len(score_bands) - 1)
+    ]
+    band_labels.insert(0, f"<{score_bands[0]}")
+    band_labels.append(f">{score_bands[-1]}")
+    # Process reference and monitoring datasets
+    def process_dataset(dataset, model):
+        df = dataset.df.copy()
+        df["score_band"] = pd.cut(
+            df[score_column],
+            bins=[-np.inf] + score_bands + [np.inf],
+            labels=band_labels,
+        )
+        y_pred = dataset.y_pred(model)
+        results = {}
+        total_population = len(df)
+        # Store min and max scores
+        min_score = df[score_column].min()
+        max_score = df[score_column].max()
+        for band in band_labels:
+            band_mask = df["score_band"] == band
+            population = band_mask.sum()
+            results[band] = {
+                "Population (%)": population / total_population * 100,
+                "Predicted Default Rate (%)": (
+                    y_pred[band_mask].sum() / population * 100 if population > 0 else 0
+                ),
+                "Observed Default Rate (%)": (
+                    df[band_mask][dataset.target_column].sum() / population * 100
+                    if population > 0
+                    else 0
+                ),
+            }
+        results["min_score"] = min_score
+        results["max_score"] = max_score
+        return results
+    # Get metrics for both datasets
+    ref_results = process_dataset(datasets[0], model)
+    mon_results = process_dataset(datasets[1], model)
+    # Create the three comparison tables
+    tables = {}
+    all_passed = True
+    metrics = [
+        ("Population Distribution (%)", "Population (%)"),
+        ("Predicted Default Rates (%)", "Predicted Default Rate (%)"),
+        ("Observed Default Rates (%)", "Observed Default Rate (%)"),
+    ]
+    for table_name, metric in metrics:
+        rows = []
+        metric_passed = True
+        for band in band_labels:
+            ref_val = ref_results[band][metric]
+            mon_val = mon_results[band][metric]
+            # Calculate drift - using absolute difference when reference is 0
+            drift = (
+                abs(mon_val - ref_val)
+                if ref_val == 0
+                else ((mon_val - ref_val) / abs(ref_val)) * 100
+            )
+            passed = abs(drift) < drift_threshold
+            metric_passed &= passed
+            rows.append(
+                {
+                    "Score Band": band,
+                    "Reference": round(ref_val, 4),
+                    "Monitoring": round(mon_val, 4),
+                    "Drift (%)": round(drift, 2),
+                    "Pass/Fail": "Pass" if passed else "Fail",
+                }
+            )
+        # Add total row for all metrics
+        if metric == "Population (%)":
+            ref_total = 100.0
+            mon_total = 100.0
+            drift_total = 0.0
+            passed_total = True
+        else:
+            ref_total = sum(
+                ref_results[band][metric] * (ref_results[band]["Population (%)"] / 100)
+                for band in band_labels
+            )
+            mon_total = sum(
+                mon_results[band][metric] * (mon_results[band]["Population (%)"] / 100)
+                for band in band_labels
+            )
+            # Apply same drift calculation to totals
+            drift_total = (
+                abs(mon_total - ref_total)
+                if ref_total == 0
+                else ((mon_total - ref_total) / abs(ref_total)) * 100
+            )
+            passed_total = abs(drift_total) < drift_threshold
+        # Format total row with score ranges
+        total_label = (
+            f"Total ({ref_results['min_score']:.0f}-{ref_results['max_score']:.0f})"
+        )
+        rows.append(
+            {
+                "Score Band": total_label,
+                "Reference": round(ref_total, 4),
+                "Monitoring": round(mon_total, 4),
+                "Drift (%)": round(drift_total, 2),
+                "Pass/Fail": "Pass" if passed_total else "Fail",
+            }
+        )
+        metric_passed &= passed_total
+        tables[table_name] = pd.DataFrame(rows)
+        all_passed &= metric_passed
+    return tables, all_passed

validmind/tests/ongoing_monitoring/ScorecardHistogramDrift.py ADDED Viewed

@@ -0,0 +1,209 @@
+# Copyright © 2023-2024 ValidMind Inc. All rights reserved.
+# See the LICENSE file in the root of this repository for details.
+# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
+from typing import List
+import numpy as np
+import pandas as pd
+import plotly.graph_objects as go
+from plotly.subplots import make_subplots
+from scipy import stats
+from validmind import tags, tasks
+from validmind.vm_models import VMDataset
+@tags("visualization", "credit_risk", "logistic_regression")
+@tasks("classification")
+def ScorecardHistogramDrift(
+    datasets: List[VMDataset],
+    score_column: str = "score",
+    title: str = "Scorecard Histogram Drift",
+    drift_pct_threshold: float = 20.0,
+):
+    """
+    Compares score distributions between reference and monitoring datasets for each class.
+    ### Purpose
+    The Scorecard Histogram Drift test is designed to evaluate changes in the model's scoring
+    patterns over time. By comparing score distributions between reference and monitoring datasets
+    for each class, this test helps identify whether the model's scoring behavior remains stable
+    in production. This is crucial for understanding if the model's risk assessment maintains
+    consistent patterns and whether specific score ranges have experienced significant shifts
+    in their distribution.
+    ### Test Mechanism
+    This test proceeds by generating histograms of scores for each class in both reference and
+    monitoring datasets. It analyzes distribution characteristics through multiple statistical
+    moments: mean, variance, skewness, and kurtosis. The test quantifies drift as percentage
+    changes in these moments between datasets, providing both visual and numerical assessments
+    of distribution stability. Special attention is paid to class-specific distribution changes.
+    ### Signs of High Risk
+    - Significant shifts in score distribution shapes
+    - Large drifts in distribution moments exceeding threshold
+    - Changes in the relative positioning of class distributions
+    - Appearance of new modes or peaks in monitoring data
+    - Unexpected changes in score spread or concentration
+    - Systematic shifts in class-specific scoring patterns
+    ### Strengths
+    - Provides class-specific distribution analysis
+    - Identifies detailed changes in scoring patterns
+    - Enables visual comparison of distributions
+    - Includes comprehensive moment analysis
+    - Supports multiple class evaluation
+    - Maintains interpretable score scale
+    ### Limitations
+    - Sensitive to binning choices in visualization
+    - Requires sufficient samples per class
+    - Cannot suggest score adjustments
+    - May not capture subtle distribution changes
+    - Complex interpretation with multiple classes
+    - Limited to univariate score analysis
+    """
+    # Verify score column exists
+    if score_column not in datasets[0].df.columns:
+        raise ValueError(
+            f"Score column '{score_column}' not found in reference dataset"
+        )
+    if score_column not in datasets[1].df.columns:
+        raise ValueError(
+            f"Score column '{score_column}' not found in monitoring dataset"
+        )
+    # Get reference and monitoring data
+    df_ref = datasets[0].df
+    df_mon = datasets[1].df
+    # Get unique classes
+    classes = sorted(df_ref[datasets[0].target_column].unique())
+    # Create subplots with more horizontal space for legends
+    fig = make_subplots(
+        rows=len(classes),
+        cols=1,
+        subplot_titles=[f"Class {cls}" for cls in classes],
+        horizontal_spacing=0.15,
+    )
+    # Define colors
+    ref_color = "rgba(31, 119, 180, 0.8)"  # Blue with 0.8 opacity
+    mon_color = "rgba(255, 127, 14, 0.8)"  # Orange with 0.8 opacity
+    # Dictionary to store tables for each class
+    tables = {}
+    all_passed = True  # Track overall pass/fail
+    # Add histograms and create tables for each class
+    for i, class_value in enumerate(classes, start=1):
+        # Get scores for current class
+        ref_scores = df_ref[df_ref[datasets[0].target_column] == class_value][
+            score_column
+        ]
+        mon_scores = df_mon[df_mon[datasets[1].target_column] == class_value][
+            score_column
+        ]
+        # Calculate distribution moments
+        ref_stats = {
+            "Mean": np.mean(ref_scores),
+            "Variance": np.var(ref_scores),
+            "Skewness": stats.skew(ref_scores),
+            "Kurtosis": stats.kurtosis(ref_scores),
+        }
+        mon_stats = {
+            "Mean": np.mean(mon_scores),
+            "Variance": np.var(mon_scores),
+            "Skewness": stats.skew(mon_scores),
+            "Kurtosis": stats.kurtosis(mon_scores),
+        }
+        # Create table for this class
+        table_data = []
+        class_passed = True  # Track pass/fail for this class
+        for stat_name in ["Mean", "Variance", "Skewness", "Kurtosis"]:
+            ref_val = ref_stats[stat_name]
+            mon_val = mon_stats[stat_name]
+            drift = (
+                ((mon_val - ref_val) / abs(ref_val)) * 100 if ref_val != 0 else np.inf
+            )
+            passed = abs(drift) < drift_pct_threshold
+            class_passed &= passed  # Update class pass/fail
+            table_data.append(
+                {
+                    "Statistic": stat_name,
+                    "Reference": round(ref_val, 4),
+                    "Monitoring": round(mon_val, 4),
+                    "Drift (%)": round(drift, 2),
+                    "Pass/Fail": "Pass" if passed else "Fail",
+                }
+            )
+        tables[f"Class {class_value}"] = pd.DataFrame(table_data)
+        all_passed &= class_passed  # Update overall pass/fail
+        # Reference dataset histogram
+        fig.add_trace(
+            go.Histogram(
+                x=ref_scores,
+                name=f"Reference - Class {class_value}",
+                marker_color=ref_color,
+                showlegend=True,
+                legendrank=i * 2 - 1,
+            ),
+            row=i,
+            col=1,
+        )
+        # Monitoring dataset histogram
+        fig.add_trace(
+            go.Histogram(
+                x=mon_scores,
+                name=f"Monitoring - Class {class_value}",
+                marker_color=mon_color,
+                showlegend=True,
+                legendrank=i * 2,
+            ),
+            row=i,
+            col=1,
+        )
+    # Update layout
+    fig.update_layout(
+        title_text=title,
+        barmode="overlay",
+        height=300 * len(classes),
+        width=1000,
+        showlegend=True,
+    )
+    # Update axes labels and add separate legends for each subplot
+    for i in range(len(classes)):
+        fig.update_xaxes(title_text="Score", row=i + 1, col=1)
+        fig.update_yaxes(title_text="Frequency", row=i + 1, col=1)
+        # Add separate legend for each subplot
+        fig.update_layout(
+            **{
+                f'legend{i+1 if i > 0 else ""}': dict(
+                    yanchor="middle",
+                    y=1 - (i / len(classes)) - (0.5 / len(classes)),
+                    xanchor="left",
+                    x=1.05,
+                    tracegroupgap=5,
+                )
+            }
+        )
+    return fig, tables, all_passed

validmind/tests/ongoing_monitoring/TargetPredictionDistributionPlot.py CHANGED Viewed

@@ -2,15 +2,17 @@
 # See the LICENSE file in the root of this repository for details.
 # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
-import matplotlib.pyplot as plt
-import seaborn as sns
+import pandas as pd
+import plotly.figure_factory as ff
+import plotly.graph_objects as go
+from scipy.stats import kurtosis, skew
 from validmind import tags, tasks
 @tags("visualization")
 @tasks("monitoring")
-def TargetPredictionDistributionPlot(datasets, model):
+def TargetPredictionDistributionPlot(datasets, model, drift_pct_threshold=20):
     """
     Assesses differences in prediction distributions between a reference dataset and a monitoring dataset to identify
     potential data drift.
@@ -45,23 +47,99 @@ def TargetPredictionDistributionPlot(datasets, model):
     - Less effective if the differences in distributions are subtle and not easily visible.
     """
+    # Get predictions
     pred_ref = datasets[0].y_prob_df(model)
     pred_ref.columns = ["Reference Prediction"]
     pred_monitor = datasets[1].y_prob_df(model)
     pred_monitor.columns = ["Monitoring Prediction"]
-    fig = plt.figure()
-    plot = sns.kdeplot(
-        pred_ref["Reference Prediction"], fill=True, label="Reference Prediction"
+    # Calculate distribution moments
+    moments = pd.DataFrame(
+        {
+            "Statistic": ["Mean", "Std", "Skewness", "Kurtosis"],
+            "Reference": [
+                pred_ref["Reference Prediction"].mean(),
+                pred_ref["Reference Prediction"].std(),
+                skew(pred_ref["Reference Prediction"]),
+                kurtosis(pred_ref["Reference Prediction"]),
+            ],
+            "Monitoring": [
+                pred_monitor["Monitoring Prediction"].mean(),
+                pred_monitor["Monitoring Prediction"].std(),
+                skew(pred_monitor["Monitoring Prediction"]),
+                kurtosis(pred_monitor["Monitoring Prediction"]),
+            ],
+        }
     )
-    plot = sns.kdeplot(
-        pred_monitor["Monitoring Prediction"], fill=True, label="Monitor Prediction"
+    # Calculate drift percentage with direction
+    moments["Drift (%)"] = (
+        (moments["Monitoring"] - moments["Reference"])
+        / moments["Reference"].abs()
+        * 100
+    ).round(2)
+    # Add Pass/Fail column based on absolute drift
+    moments["Pass/Fail"] = (
+        moments["Drift (%)"]
+        .abs()
+        .apply(lambda x: "Pass" if x < drift_pct_threshold else "Fail")
+    )
+    # Set Statistic as index but keep it as a column
+    moments = moments.set_index("Statistic", drop=False)
+    # Create KDE for both distributions
+    ref_kde = ff.create_distplot(
+        [pred_ref["Reference Prediction"].values],
+        ["Reference"],
+        show_hist=False,
+        show_rug=False,
+    )
+    monitor_kde = ff.create_distplot(
+        [pred_monitor["Monitoring Prediction"].values],
+        ["Monitoring"],
+        show_hist=False,
+        show_rug=False,
     )
-    plot.set(
-        xlabel="Prediction", title="Distribution of Reference & Monitor Predictions"
+    # Create new figure
+    fig = go.Figure()
+    # Add reference distribution
+    fig.add_trace(
+        go.Scatter(
+            x=ref_kde.data[0].x,
+            y=ref_kde.data[0].y,
+            fill="tozeroy",
+            name="Reference Prediction",
+            line=dict(color="blue", width=2),
+            opacity=0.6,
+        )
+    )
+    # Add monitoring distribution
+    fig.add_trace(
+        go.Scatter(
+            x=monitor_kde.data[0].x,
+            y=monitor_kde.data[0].y,
+            fill="tozeroy",
+            name="Monitor Prediction",
+            line=dict(color="red", width=2),
+            opacity=0.6,
+        )
+    )
+    # Update layout
+    fig.update_layout(
+        title="Distribution of Reference & Monitor Predictions",
+        xaxis_title="Prediction",
+        yaxis_title="Density",
+        showlegend=True,
+        template="plotly_white",
+        hovermode="x unified",
     )
-    plot.legend()
-    plt.close()
+    pass_fail_bool = (moments["Pass/Fail"] == "Pass").all()
-    return fig
+    return ({"Distribution Moments": moments}, fig, pass_fail_bool)

validmind/tests/prompt_validation/Bias.py CHANGED Viewed

@@ -2,7 +2,7 @@
 # See the LICENSE file in the root of this repository for details.
 # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
-from validmind import tags, tasks
+from validmind import RawData, tags, tasks
 from validmind.errors import MissingRequiredTestInputError
 from .ai_powered_test import (
@@ -107,11 +107,15 @@ def Bias(model, min_threshold=7):
     passed = score > min_threshold
-    return [
-        {
-            "Score": score,
-            "Explanation": explanation,
-            "Threshold": min_threshold,
-            "Pass/Fail": "Pass" if passed else "Fail",
-        }
-    ], passed
+    return (
+        [
+            {
+                "Score": score,
+                "Explanation": explanation,
+                "Threshold": min_threshold,
+                "Pass/Fail": "Pass" if passed else "Fail",
+            }
+        ],
+        passed,
+        RawData(response=response),
+    )

validmind/tests/prompt_validation/Clarity.py CHANGED Viewed

@@ -2,7 +2,7 @@
 # See the LICENSE file in the root of this repository for details.
 # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
-from validmind import tags, tasks
+from validmind import RawData, tags, tasks
 from validmind.errors import MissingRequiredTestInputError
 from .ai_powered_test import (
@@ -96,11 +96,15 @@ def Clarity(model, min_threshold=7):
     passed = score > min_threshold
-    return [
-        {
-            "Score": score,
-            "Explanation": explanation,
-            "Threshold": min_threshold,
-            "Pass/Fail": "Pass" if passed else "Fail",
-        }
-    ], passed
+    return (
+        [
+            {
+                "Score": score,
+                "Explanation": explanation,
+                "Threshold": min_threshold,
+                "Pass/Fail": "Pass" if passed else "Fail",
+            }
+        ],
+        passed,
+        RawData(response=response),
+    )

validmind/tests/prompt_validation/Conciseness.py CHANGED Viewed

@@ -2,7 +2,7 @@
 # See the LICENSE file in the root of this repository for details.
 # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
-from validmind import tags, tasks
+from validmind import RawData, tags, tasks
 from validmind.errors import MissingRequiredTestInputError
 from .ai_powered_test import (
@@ -103,11 +103,15 @@ def Conciseness(model, min_threshold=7):
     passed = score > min_threshold
-    return [
-        {
-            "Score": score,
-            "Threshold": min_threshold,
-            "Explanation": explanation,
-            "Pass/Fail": "Pass" if passed else "Fail",
-        }
-    ], passed
+    return (
+        [
+            {
+                "Score": score,
+                "Threshold": min_threshold,
+                "Explanation": explanation,
+                "Pass/Fail": "Pass" if passed else "Fail",
+            }
+        ],
+        passed,
+        RawData(response=response),
+    )

validmind/tests/prompt_validation/Delimitation.py CHANGED Viewed

@@ -2,7 +2,7 @@
 # See the LICENSE file in the root of this repository for details.
 # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
-from validmind import tags, tasks
+from validmind import RawData, tags, tasks
 from validmind.errors import MissingRequiredTestInputError
 from .ai_powered_test import (
@@ -89,11 +89,15 @@ def Delimitation(model, min_threshold=7):
     passed = score > min_threshold
-    return [
-        {
-            "Score": score,
-            "Threshold": min_threshold,
-            "Explanation": explanation,
-            "Pass/Fail": "Pass" if passed else "Fail",
-        }
-    ], passed
+    return (
+        [
+            {
+                "Score": score,
+                "Threshold": min_threshold,
+                "Explanation": explanation,
+                "Pass/Fail": "Pass" if passed else "Fail",
+            }
+        ],
+        passed,
+        RawData(response=response),
+    )

validmind 2.7.5__py3-none-any.whl → 2.7.7__py3-none-any.whl

validmind 2.7.5py3-none-any.whl → 2.7.7py3-none-any.whl