PyPI - validmind - Versions diffs - 2.6.10__py3-none-any.whl → 2.7.4__py3-none-any.whl - Mend

validmind 2.6.10py3-none-any.whl → 2.7.4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (30) hide show

validmind/__init__.py +2 -0
validmind/__version__.py +1 -1
validmind/ai/test_descriptions.py +20 -4
validmind/ai/test_result_description/user.jinja +5 -0
validmind/datasets/credit_risk/lending_club.py +444 -14
validmind/tests/data_validation/MutualInformation.py +129 -0
validmind/tests/data_validation/ScoreBandDefaultRates.py +139 -0
validmind/tests/data_validation/TooManyZeroValues.py +6 -5
validmind/tests/data_validation/UniqueRows.py +3 -1
validmind/tests/decorator.py +18 -16
validmind/tests/model_validation/sklearn/CalibrationCurve.py +116 -0
validmind/tests/model_validation/sklearn/ClassifierThresholdOptimization.py +261 -0
validmind/tests/model_validation/sklearn/ConfusionMatrix.py +1 -0
validmind/tests/model_validation/sklearn/HyperParametersTuning.py +144 -56
validmind/tests/model_validation/sklearn/ModelParameters.py +74 -0
validmind/tests/model_validation/sklearn/ROCCurve.py +26 -23
validmind/tests/model_validation/sklearn/ScoreProbabilityAlignment.py +130 -0
validmind/tests/model_validation/statsmodels/CumulativePredictionProbabilities.py +5 -6
validmind/tests/model_validation/statsmodels/PredictionProbabilitiesHistogram.py +2 -3
validmind/tests/output.py +10 -1
validmind/tests/run.py +52 -54
validmind/utils.py +34 -7
validmind/vm_models/figure.py +15 -0
validmind/vm_models/result/__init__.py +2 -2
validmind/vm_models/result/result.py +136 -23
{validmind-2.6.10.dist-info → validmind-2.7.4.dist-info}/METADATA +1 -1
{validmind-2.6.10.dist-info → validmind-2.7.4.dist-info}/RECORD +30 -24
{validmind-2.6.10.dist-info → validmind-2.7.4.dist-info}/LICENSE +0 -0
{validmind-2.6.10.dist-info → validmind-2.7.4.dist-info}/WHEEL +0 -0
{validmind-2.6.10.dist-info → validmind-2.7.4.dist-info}/entry_points.txt +0 -0

validmind/tests/data_validation/MutualInformation.py ADDED Viewed

@@ -0,0 +1,129 @@
+# Copyright © 2023-2024 ValidMind Inc. All rights reserved.
+# See the LICENSE file in the root of this repository for details.
+# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
+import plotly.graph_objects as go
+from sklearn.feature_selection import mutual_info_classif, mutual_info_regression
+from validmind import tags, tasks
+from validmind.vm_models import VMDataset
+from validmind.vm_models.result import RawData
+@tags("feature_selection", "data_analysis")
+@tasks("classification", "regression")
+def MutualInformation(
+    dataset: VMDataset, min_threshold: float = 0.01, task: str = "classification"
+):
+    """
+    Calculates mutual information scores between features and target variable to evaluate feature relevance.
+    ### Purpose
+    The Mutual Information test quantifies the predictive power of each feature by measuring its statistical
+    dependency with the target variable. This helps identify relevant features for model training and
+    detect potential redundant or irrelevant variables, supporting feature selection decisions and model
+    interpretability.
+    ### Test Mechanism
+    The test employs sklearn's mutual_info_classif/mutual_info_regression functions to compute mutual
+    information between each feature and the target. It produces a normalized score (0 to 1) for each
+    feature, where higher scores indicate stronger relationships. Results are presented in both tabular
+    format and visualized through a bar plot with a configurable threshold line.
+    ### Signs of High Risk
+    - Many features showing very low mutual information scores
+    - Key business features exhibiting unexpectedly low scores
+    - All features showing similar, low information content
+    - Large discrepancy between business importance and MI scores
+    - Highly skewed distribution of MI scores
+    - Critical features below the minimum threshold
+    - Unexpected zero or near-zero scores for known important features
+    - Inconsistent scores across different data samples
+    ### Strengths
+    - Captures non-linear relationships between features and target
+    - Scale-invariant measurement of feature relevance
+    - Works for both classification and regression tasks
+    - Provides interpretable scores (0 to 1 scale)
+    - Supports automated feature selection
+    - No assumptions about data distribution
+    - Handles numerical and categorical features
+    - Computationally efficient for most datasets
+    ### Limitations
+    - Requires sufficient data for reliable estimates
+    - May be computationally intensive for very large datasets
+    - Cannot detect redundant features (pairwise relationships)
+    - Sensitive to feature discretization for continuous variables
+    - Does not account for feature interactions
+    - May underestimate importance of rare but crucial events
+    - Cannot handle missing values directly
+    - May be affected by extreme class imbalance
+    """
+    if task not in ["classification", "regression"]:
+        raise ValueError("task must be either 'classification' or 'regression'")
+    X = dataset.x
+    y = dataset.y
+    # Select appropriate MI function based on task type
+    if task == "classification":
+        mi_scores = mutual_info_classif(X, y)
+    else:
+        mi_scores = mutual_info_regression(X, y)
+    # Create DataFrame for raw data
+    raw_data = RawData(
+        feature=dataset.feature_columns,
+        mutual_information_score=mi_scores.tolist(),
+        pass_fail=["Pass" if score >= min_threshold else "Fail" for score in mi_scores],
+    )
+    # Create Plotly figure
+    fig = go.Figure()
+    # Sort data for better visualization
+    sorted_indices = sorted(
+        range(len(mi_scores)), key=lambda k: mi_scores[k], reverse=True
+    )
+    sorted_features = [dataset.feature_columns[i] for i in sorted_indices]
+    sorted_scores = [mi_scores[i] for i in sorted_indices]
+    # Add bar plot
+    fig.add_trace(
+        go.Bar(
+            x=sorted_features,
+            y=sorted_scores,
+            marker_color=[
+                "blue" if score >= min_threshold else "red" for score in sorted_scores
+            ],
+            name="Mutual Information Score",
+        )
+    )
+    # Add threshold line
+    fig.add_hline(
+        y=min_threshold,
+        line_dash="dash",
+        line_color="gray",
+        annotation_text=f"Threshold ({min_threshold})",
+        annotation_position="right",
+    )
+    # Update layout
+    fig.update_layout(
+        title="Mutual Information Scores by Feature",
+        xaxis_title="Features",
+        yaxis_title="Mutual Information Score",
+        xaxis_tickangle=-45,
+        showlegend=False,
+        width=1000,
+        height=600,
+        template="plotly_white",
+    )
+    return raw_data, fig

validmind/tests/data_validation/ScoreBandDefaultRates.py ADDED Viewed

@@ -0,0 +1,139 @@
+# Copyright © 2023-2024 ValidMind Inc. All rights reserved.
+# See the LICENSE file in the root of this repository for details.
+# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
+import pandas as pd
+import numpy as np
+from validmind import tags, tasks
+from validmind.vm_models import VMDataset, VMModel
+@tags("visualization", "credit_risk", "scorecard")
+@tasks("classification")
+def ScoreBandDefaultRates(
+    dataset: VMDataset,
+    model: VMModel,
+    score_column: str = "score",
+    score_bands: list = None,
+):
+    """
+    Analyzes default rates and population distribution across credit score bands.
+    ### Purpose
+    The Score Band Default Rates test evaluates the discriminatory power of credit scores by analyzing
+    default rates across different score bands. This helps validate score effectiveness, supports
+    policy decisions, and provides insights into portfolio risk distribution.
+    ### Test Mechanism
+    The test segments the score distribution into bands and calculates key metrics for each band:
+    1. Population count and percentage in each band
+    2. Default rate within each band
+    3. Cumulative statistics across bands
+    The results show how well the scores separate good and bad accounts.
+    ### Signs of High Risk
+    - Non-monotonic default rates across score bands
+    - Insufficient population in critical score bands
+    - Unexpected default rates for score ranges
+    - High concentration in specific score bands
+    - Similar default rates across adjacent bands
+    - Unstable default rates in key decision bands
+    - Extreme population skewness
+    - Poor risk separation between bands
+    ### Strengths
+    - Clear view of score effectiveness
+    - Supports policy threshold decisions
+    - Easy to interpret and communicate
+    - Directly links to business decisions
+    - Shows risk segmentation power
+    - Identifies potential score issues
+    - Helps validate scoring model
+    - Supports portfolio monitoring
+    ### Limitations
+    - Sensitive to band definition choices
+    - May mask within-band variations
+    - Requires sufficient data in each band
+    - Cannot capture non-linear patterns
+    - Point-in-time analysis only
+    - No temporal trend information
+    - Assumes band boundaries are appropriate
+    - May oversimplify risk patterns
+    """
+    if score_column not in dataset.df.columns:
+        raise ValueError(
+            f"The required column '{score_column}' is not present in the dataset with input_id {dataset.input_id}"
+        )
+    df = dataset._df.copy()
+    # Default score bands if none provided
+    if score_bands is None:
+        score_bands = [410, 440, 470]
+    # Create band labels
+    band_labels = [
+        f"{score_bands[i]}-{score_bands[i+1]}" for i in range(len(score_bands) - 1)
+    ]
+    band_labels.insert(0, f"<{score_bands[0]}")
+    band_labels.append(f">{score_bands[-1]}")
+    # Bin the scores with infinite upper bound
+    df["score_band"] = pd.cut(
+        df[score_column], bins=[-np.inf] + score_bands + [np.inf], labels=band_labels
+    )
+    # Calculate min and max scores for the total row
+    min_score = df[score_column].min()
+    max_score = df[score_column].max()
+    # Get predicted classes (0/1)
+    y_pred = dataset.y_pred(model)
+    # Calculate metrics by band using target_column name
+    results = []
+    for band in band_labels:
+        band_mask = df["score_band"] == band
+        population = band_mask.sum()
+        observed_defaults = df[band_mask][dataset.target_column].sum()
+        predicted_defaults = y_pred[
+            band_mask
+        ].sum()  # Sum of 1s gives number of predicted defaults
+        results.append(
+            {
+                "Score Band": band,
+                "Population Count": population,
+                "Population (%)": population / len(df) * 100,
+                "Predicted Default Rate (%)": (
+                    predicted_defaults / population * 100 if population > 0 else 0
+                ),
+                "Observed Default Rate (%)": (
+                    observed_defaults / population * 100 if population > 0 else 0
+                ),
+            }
+        )
+    # Add total row
+    total_population = len(df)
+    total_observed = df[dataset.target_column].sum()
+    total_predicted = y_pred.sum()  # Total number of predicted defaults
+    results.append(
+        {
+            "Score Band": f"Total ({min_score:.0f}-{max_score:.0f})",
+            "Population Count": total_population,
+            "Population (%)": sum(r["Population (%)"] for r in results),
+            "Predicted Default Rate (%)": total_predicted / total_population * 100,
+            "Observed Default Rate (%)": total_observed / total_population * 100,
+        }
+    )
+    return pd.DataFrame(results)

validmind/tests/data_validation/TooManyZeroValues.py CHANGED Viewed

@@ -61,24 +61,25 @@ def TooManyZeroValues(dataset: VMDataset, max_percent_threshold: float = 0.03):
     issues.
     """
     df = dataset.df
     table = []
     for col in dataset.feature_columns_numeric:
         value_counts = df[col].value_counts()
+        row_count = df.shape[0]
         if 0 not in value_counts.index:
             continue
         n_zeros = value_counts[0]
-        p_zeros = n_zeros / df.shape[0]
+        p_zeros = (n_zeros / row_count) * 100
         table.append(
             {
-                "Column": col,
+                "Variable": col,
+                "Row Count": row_count,
                 "Number of Zero Values": n_zeros,
-                "Percentage of Zero Values (%)": p_zeros * 100,
-                "Pass/Fail": "Pass" if p_zeros < max_percent_threshold else "Fail",
+                "Percentage of Zero Values (%)": p_zeros,
+                "Pass/Fail": ("Pass" if p_zeros < (max_percent_threshold) else "Fail"),
             }
         )

validmind/tests/data_validation/UniqueRows.py CHANGED Viewed

@@ -61,7 +61,9 @@ def UniqueRows(dataset: VMDataset, min_percent_threshold: float = 1):
             "Number of Unique Values": unique_rows[col],
             "Percentage of Unique Values (%)": unique_rows[col] / rows * 100,
             "Pass/Fail": (
-                "Pass" if unique_rows[col] / rows >= min_percent_threshold else "Fail"
+                "Pass"
+                if (unique_rows[col] / rows * 100) >= min_percent_threshold
+                else "Fail"
             ),
         }
         for col in unique_rows.index

validmind/tests/decorator.py CHANGED Viewed

@@ -24,6 +24,11 @@ def _get_save_func(func, test_id):
     test library.
     """
+    # get og source before its wrapped by the test decorator
+    source = inspect.getsource(func)
+    # remove decorator line
+    source = source.split("\n", 1)[1]
     def save(root_folder=".", imports=None):
         parts = test_id.split(".")
@@ -41,35 +46,32 @@ def _get_save_func(func, test_id):
         full_path = os.path.join(path, f"{test_name}.py")
-        source = inspect.getsource(func)
-        # remove decorator line
-        source = source.split("\n", 1)[1]
+        _source = source.replace(f"def {func.__name__}", f"def {test_name}")
         if imports:
             imports = "\n".join(imports)
-            source = f"{imports}\n\n\n{source}"
+            _source = f"{imports}\n\n\n{_source}"
         # add comment to the top of the file
-        source = f"""
+        _source = f"""
 # Saved from {func.__module__}.{func.__name__}
 # Original Test ID: {test_id}
 # New Test ID: {new_test_id}
-{source}
+{_source}
 """
-        # ensure that the function name matches the test name
-        source = source.replace(f"def {func.__name__}", f"def {test_name}")
         # use black to format the code
         try:
             import black
-            source = black.format_str(source, mode=black.FileMode())
+            _source = black.format_str(_source, mode=black.FileMode())
         except ImportError:
             # ignore if not available
             pass
         with open(full_path, "w") as file:
-            file.writelines(source)
+            file.writelines(_source)
         logger.info(
             f"Saved to {os.path.abspath(full_path)}!"
@@ -119,12 +121,12 @@ def test(func_or_id):
         test_func = load_test(test_id, func, reload=True)
         test_store.register_test(test_id, test_func)
-        @wraps(test_func)
-        def wrapper(*args, **kwargs):
-            return test_func(*args, **kwargs)
         # special function to allow the function to be saved to a file
-        wrapper.save = _get_save_func(test_func, test_id)
+        save_func = _get_save_func(func, test_id)
+        wrapper = wraps(func)(test_func)
+        wrapper.test_id = test_id
+        wrapper.save = save_func
         return wrapper

validmind/tests/model_validation/sklearn/CalibrationCurve.py ADDED Viewed

@@ -0,0 +1,116 @@
+# Copyright © 2023-2024 ValidMind Inc. All rights reserved.
+# See the LICENSE file in the root of this repository for details.
+# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
+from sklearn.calibration import calibration_curve
+import plotly.graph_objects as go
+from validmind import tags, tasks
+from validmind.vm_models import VMModel, VMDataset
+from validmind.vm_models.result import RawData
+@tags("sklearn", "model_performance", "classification")
+@tasks("classification")
+def CalibrationCurve(model: VMModel, dataset: VMDataset, n_bins: int = 10):
+    """
+    Evaluates the calibration of probability estimates by comparing predicted probabilities against observed
+    frequencies.
+    ### Purpose
+    The Calibration Curve test assesses how well a model's predicted probabilities align with actual
+    observed frequencies. This is crucial for applications requiring accurate probability estimates,
+    such as risk assessment, decision-making systems, and cost-sensitive applications where probability
+    calibration directly impacts business decisions.
+    ### Test Mechanism
+    The test uses sklearn's calibration_curve function to:
+    1. Sort predictions into bins based on predicted probabilities
+    2. Calculate the mean predicted probability in each bin
+    3. Compare against the observed frequency of positive cases
+    4. Plot the results against the perfect calibration line (y=x)
+    The resulting curve shows how well the predicted probabilities match empirical probabilities.
+    ### Signs of High Risk
+    - Significant deviation from the perfect calibration line
+    - Systematic overconfidence (predictions too close to 0 or 1)
+    - Systematic underconfidence (predictions clustered around 0.5)
+    - Empty or sparse bins indicating poor probability coverage
+    - Sharp discontinuities in the calibration curve
+    - Different calibration patterns across different probability ranges
+    - Consistent over/under estimation in critical probability regions
+    - Large confidence intervals in certain probability ranges
+    ### Strengths
+    - Visual and intuitive interpretation of probability quality
+    - Identifies systematic biases in probability estimates
+    - Supports probability threshold selection
+    - Helps understand model confidence patterns
+    - Applicable across different classification models
+    - Enables comparison between different models
+    - Guides potential need for recalibration
+    - Critical for risk-sensitive applications
+    ### Limitations
+    - Sensitive to the number of bins chosen
+    - Requires sufficient samples in each bin for reliable estimates
+    - May mask local calibration issues within bins
+    - Does not account for feature-dependent calibration issues
+    - Limited to binary classification problems
+    - Cannot detect all forms of miscalibration
+    - Assumes bin boundaries are appropriate for the problem
+    - May be affected by class imbalance
+    """
+    prob_true, prob_pred = calibration_curve(
+        dataset.y, dataset.y_prob(model), n_bins=n_bins
+    )
+    # Create DataFrame for raw data
+    raw_data = RawData(
+        mean_predicted_probability=prob_pred, observed_frequency=prob_true
+    )
+    # Create Plotly figure
+    fig = go.Figure()
+    # Add perfect calibration line
+    fig.add_trace(
+        go.Scatter(
+            x=[0, 1],
+            y=[0, 1],
+            mode="lines",
+            name="Perfect Calibration",
+            line=dict(dash="dash", color="gray"),
+        )
+    )
+    # Add calibration curve
+    fig.add_trace(
+        go.Scatter(
+            x=prob_pred,
+            y=prob_true,
+            mode="lines+markers",
+            name="Model Calibration",
+            line=dict(color="blue"),
+            marker=dict(size=8),
+        )
+    )
+    # Update layout
+    fig.update_layout(
+        title="Calibration Curve",
+        xaxis_title="Mean Predicted Probability",
+        yaxis_title="Observed Frequency",
+        xaxis=dict(range=[0, 1]),
+        yaxis=dict(range=[0, 1]),
+        width=800,
+        height=600,
+        showlegend=True,
+        template="plotly_white",
+    )
+    return raw_data, fig

validmind 2.6.10__py3-none-any.whl → 2.7.4__py3-none-any.whl

validmind 2.6.10py3-none-any.whl → 2.7.4py3-none-any.whl