PyPI - validmind - Versions diffs - 2.7.2__py3-none-any.whl → 2.7.4__py3-none-any.whl - Mend

validmind 2.7.2py3-none-any.whl → 2.7.4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (25) hide show

validmind/tests/model_validation/sklearn/HyperParametersTuning.py CHANGED Viewed

@@ -2,73 +2,161 @@
 # See the LICENSE file in the root of this repository for details.
 # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
-from typing import Union
+from typing import Union, Dict, List
 from sklearn.model_selection import GridSearchCV
+from sklearn.metrics import make_scorer, recall_score
 from validmind import tags, tasks
-from validmind.errors import SkipTestError
 from validmind.vm_models import VMDataset, VMModel
 @tags("sklearn", "model_performance")
 @tasks("classification", "clustering")
+def custom_recall(y_true, y_pred_proba, threshold=0.5):
+    y_pred = (y_pred_proba >= threshold).astype(int)
+    return recall_score(y_true, y_pred)
+def _get_metrics(scoring):
+    """Convert scoring parameter to list of metrics."""
+    if scoring is None:
+        return ["accuracy"]
+    return (
+        scoring
+        if isinstance(scoring, list)
+        else list(scoring.keys()) if isinstance(scoring, dict) else [scoring]
+    )
+def _get_thresholds(thresholds):
+    """Convert thresholds parameter to list."""
+    if thresholds is None:
+        return [0.5]
+    return [thresholds] if isinstance(thresholds, (int, float)) else thresholds
+def _create_scoring_dict(scoring, metrics, threshold):
+    """Create scoring dictionary for GridSearchCV."""
+    if scoring is None:
+        return None
+    scoring_dict = {}
+    for metric in metrics:
+        if metric == "recall":
+            scoring_dict[metric] = make_scorer(
+                custom_recall, needs_proba=True, threshold=threshold
+            )
+        elif metric == "roc_auc":
+            scoring_dict[metric] = "roc_auc"
+        else:
+            scoring_dict[metric] = metric
+    return scoring_dict
+@tags("sklearn", "model_performance")
+@tasks("clustering", "classification")
 def HyperParametersTuning(
     model: VMModel,
     dataset: VMDataset,
-    param_grid: Union[dict, None] = None,
-    scoring: Union[str, None] = None,
+    param_grid: dict,
+    scoring: Union[str, List, Dict] = None,
+    thresholds: Union[float, List[float]] = None,
+    fit_params: dict = None,
 ):
     """
-    Exerts exhaustive grid search to identify optimal hyperparameters for the model, improving performance.
-    ### Purpose:
-    The "HyperParametersTuning" metric aims to find the optimal set of hyperparameters for a given model. The test is
-    designed to enhance the performance of the model by determining the best configuration of hyperparameters. The
-    parameters that are being optimized are defined by the parameter grid provided to the metric.
-    ### Test Mechanism:
-    The HyperParametersTuning test employs a grid search mechanism using the GridSearchCV function from the
-    scikit-learn library. The grid search algorithm systematically works through multiple combinations of parameter
-    values, cross-validating to determine which combination gives the best model performance. The chosen model and the
-    parameter grid passed for tuning are necessary inputs. Once the grid search is complete, the test caches and
-    returns details of the best model and its associated parameters.
-    ### Signs of High Risk:
-    - The test raises a SkipTestError if the param_grid is not supplied, indicating a lack of specific parameters to
-    optimize, which can be risky for certain model types reliant on parameter tuning.
-    - Poorly chosen scoring metrics that do not align well with the specific model or problem at hand could reflect
-    potential risks or failures in achieving optimal performance.
-    ### Strengths:
-    - Provides a comprehensive exploration mechanism to identify the best set of hyperparameters for the supplied
-    model, thereby enhancing its performance.
-    - Implements GridSearchCV, simplifying and automating the time-consuming task of hyperparameter tuning.
-    ### Limitations:
-    - The grid search algorithm can be computationally expensive, especially with large datasets or complex models, and
-    can be time-consuming as it tests all possible combinations within the specified parameter grid.
-    - The effectiveness of the tuning is heavily dependent on the quality of data and only accepts datasets with
-    numerical or ordered categories.
-    - Assumes that the same set of hyperparameters is optimal for all problem sets, which may not be true in every
-    scenario.
-    - There's a potential risk of overfitting the model if the training set is not representative of the data that the
-    model will be applied to.
+    Performs exhaustive grid search over specified parameter ranges to find optimal model configurations
+    across different metrics and decision thresholds.
+    ### Purpose
+    The Hyperparameter Tuning test systematically explores the model's parameter space to identify optimal
+    configurations. It supports multiple optimization metrics and decision thresholds, providing a comprehensive
+    view of how different parameter combinations affect various aspects of model performance.
+    ### Test Mechanism
+    The test uses scikit-learn's GridSearchCV to perform cross-validation for each parameter combination.
+    For each specified threshold and optimization metric, it creates a scoring dictionary with
+    threshold-adjusted metrics, performs grid search with cross-validation, records best parameters and
+    corresponding scores, and combines results into a comparative table. This process is repeated for each
+    optimization metric to provide a comprehensive view of model performance under different configurations.
+    ### Signs of High Risk
+    - Large performance variations across different parameter combinations
+    - Significant discrepancies between different optimization metrics
+    - Best parameters at the edges of the parameter grid
+    - Unstable performance across different thresholds
+    - Overly complex model configurations (risk of overfitting)
+    - Very different optimal parameters for different metrics
+    - Cross-validation scores showing high variance
+    - Extreme parameter values in best configurations
+    ### Strengths
+    - Comprehensive exploration of parameter space
+    - Supports multiple optimization metrics
+    - Allows threshold optimization
+    - Provides comparative view across different configurations
+    - Uses cross-validation for robust evaluation
+    - Helps understand trade-offs between different metrics
+    - Enables systematic parameter selection
+    - Supports both classification and clustering tasks
+    ### Limitations
+    - Computationally expensive for large parameter grids
+    - May not find global optimum (limited to grid points)
+    - Cannot handle dependencies between parameters
+    - Memory intensive for large datasets
+    - Limited to scikit-learn compatible models
+    - Cross-validation splits may not preserve time series structure
+    - Grid search may miss optimal values between grid points
+    - Resource intensive for high-dimensional parameter spaces
     """
-    if not param_grid:
-        raise SkipTestError("'param_grid' dictionary must be provided to run this test")
-    estimators = GridSearchCV(model.model, param_grid=param_grid, scoring=scoring)
-    estimators.fit(dataset.x, dataset.y)
-    return [
-        {
-            "Best Model": estimators.best_estimator_,
-            "Best Parameters": estimators.best_params_,
-        }
-    ]
+    fit_params = fit_params or {}
+    # Simple case: no scoring and no thresholds
+    if scoring is None and thresholds is None:
+        estimators = GridSearchCV(model.model, param_grid=param_grid, scoring=None)
+        estimators.fit(dataset.x_df(), dataset.y, **fit_params)
+        return [
+            {
+                "Best Model": estimators.best_estimator_,
+                "Best Parameters": estimators.best_params_,
+            }
+        ]
+    # Complex case: with scoring or thresholds
+    results = []
+    metrics = _get_metrics(scoring)
+    thresholds = _get_thresholds(thresholds)
+    for threshold in thresholds:
+        scoring_dict = _create_scoring_dict(scoring, metrics, threshold)
+        for optimize_for in metrics:
+            estimators = GridSearchCV(
+                model.model,
+                param_grid=param_grid,
+                scoring=scoring_dict,
+                refit=optimize_for if scoring is not None else True,
+            )
+            estimators.fit(dataset.x_df(), dataset.y, **fit_params)
+            best_index = estimators.best_index_
+            row_result = {
+                "Optimized for": optimize_for,
+                "Threshold": threshold,
+                "Best Parameters": estimators.best_params_,
+            }
+            score_key = (
+                "mean_test_score" if scoring is None else f"mean_test_{optimize_for}"
+            )
+            row_result[optimize_for] = estimators.cv_results_[score_key][best_index]
+            results.append(row_result)
+    return results

validmind/tests/model_validation/sklearn/ModelParameters.py ADDED Viewed

@@ -0,0 +1,74 @@
+# Copyright © 2023-2024 ValidMind Inc. All rights reserved.
+# See the LICENSE file in the root of this repository for details.
+# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
+import pandas as pd
+from validmind import tags, tasks
+@tags("model_training", "metadata")
+@tasks("classification", "regression")
+def ModelParameters(model, model_params=None):
+    """
+    Extracts and displays model parameters in a structured format for transparency and reproducibility.
+    ### Purpose
+    The Model Parameters test is designed to provide transparency into model configuration and ensure
+    reproducibility of machine learning models. It accomplishes this by extracting and presenting all
+    relevant parameters that define the model's behavior, making it easier to audit, validate, and
+    reproduce model training.
+    ### Test Mechanism
+    The test leverages scikit-learn's API convention of get_params() to extract model parameters. It
+    produces a structured DataFrame containing parameter names and their corresponding values. For models
+    that follow scikit-learn's API (including XGBoost, RandomForest, and other estimators), all
+    parameters are automatically extracted and displayed.
+    ### Signs of High Risk
+    - Missing crucial parameters that should be explicitly set
+    - Extreme parameter values that could indicate overfitting (e.g., unlimited tree depth)
+    - Inconsistent parameters across different versions of the same model type
+    - Parameter combinations known to cause instability or poor performance
+    - Default values used for critical parameters that should be tuned
+    ### Strengths
+    - Universal compatibility with scikit-learn API-compliant models
+    - Ensures transparency in model configuration
+    - Facilitates model reproducibility and version control
+    - Enables systematic parameter auditing
+    - Supports both classification and regression models
+    - Helps identify potential configuration issues
+    ### Limitations
+    - Only works with models implementing scikit-learn's get_params() method
+    - Cannot capture dynamic parameters set during model training
+    - Does not validate parameter values for model-specific appropriateness
+    - Parameter meanings and impacts may vary across different model types
+    - Cannot detect indirect parameter interactions or their effects on model performance
+    """
+    # Check if model implements get_params()
+    if not hasattr(model.model, "get_params"):
+        return pd.DataFrame()
+    # Get all model parameters
+    params = model.model.get_params()
+    # If model_params is None, use all parameters from get_params()
+    if model_params is None:
+        model_params = sorted(params.keys())  # Sort for consistent ordering
+    # Create DataFrame with parameters and their values
+    param_df = pd.DataFrame(
+        [
+            {"Parameter": param, "Value": str(params.get(param, "Not specified"))}
+            for param in model_params
+            if params.get(param) is not None
+        ]
+    )
+    return param_df

validmind/tests/model_validation/sklearn/ScoreProbabilityAlignment.py ADDED Viewed

@@ -0,0 +1,130 @@
+# Copyright © 2023-2024 ValidMind Inc. All rights reserved.
+# See the LICENSE file in the root of this repository for details.
+# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
+import pandas as pd
+import plotly.graph_objects as go
+from validmind import tags, tasks
+from validmind.vm_models import VMModel, VMDataset
+@tags("visualization", "credit_risk", "calibration")
+@tasks("classification")
+def ScoreProbabilityAlignment(
+    model: VMModel, dataset: VMDataset, score_column: str = "score", n_bins: int = 10
+):
+    """
+    Analyzes the alignment between credit scores and predicted probabilities.
+    ### Purpose
+    The Score-Probability Alignment test evaluates how well credit scores align with
+    predicted default probabilities. This helps validate score scaling, identify potential
+    calibration issues, and ensure scores reflect risk appropriately.
+    ### Test Mechanism
+    The test:
+    1. Groups scores into bins
+    2. Calculates average predicted probability per bin
+    3. Tests monotonicity of relationship
+    4. Analyzes probability distribution within score bands
+    ### Signs of High Risk
+    - Non-monotonic relationship between scores and probabilities
+    - Large probability variations within score bands
+    - Unexpected probability jumps between adjacent bands
+    - Poor alignment with expected odds-to-score relationship
+    - Inconsistent probability patterns across score ranges
+    - Clustering of probabilities at extreme values
+    - Score bands with similar probability profiles
+    - Unstable probability estimates in key decision bands
+    ### Strengths
+    - Direct validation of score-to-probability relationship
+    - Identifies potential calibration issues
+    - Supports score band validation
+    - Helps understand model behavior
+    - Useful for policy setting
+    - Visual and numerical results
+    - Easy to interpret
+    - Supports regulatory documentation
+    ### Limitations
+    - Sensitive to bin selection
+    - Requires sufficient data per bin
+    - May mask within-bin variations
+    - Point-in-time analysis only
+    - Cannot detect all forms of miscalibration
+    - Assumes scores should align with probabilities
+    - May oversimplify complex relationships
+    - Limited to binary outcomes
+    """
+    if score_column not in dataset.df.columns:
+        raise ValueError(f"Score column '{score_column}' not found in dataset")
+    # Get predicted probabilities
+    y_prob = dataset.y_prob(model)
+    # Create score bins
+    df = dataset.df.copy()
+    df["probability"] = y_prob
+    # Create score bins with equal width
+    df["score_bin"] = pd.qcut(df[score_column], n_bins, duplicates="drop")
+    # Calculate statistics per bin
+    results = []
+    for bin_name, group in df.groupby("score_bin"):
+        bin_stats = {
+            "Score Range": f"{bin_name.left:.0f}-{bin_name.right:.0f}",
+            "Mean Score": group[score_column].mean(),
+            "Population Count": len(group),
+            "Population (%)": len(group) / len(df) * 100,
+            "Mean Probability (%)": group["probability"].mean() * 100,
+            "Min Probability (%)": group["probability"].min() * 100,
+            "Max Probability (%)": group["probability"].max() * 100,
+            "Probability Std": group["probability"].std() * 100,
+        }
+        results.append(bin_stats)
+    results_df = pd.DataFrame(results)
+    # Create visualization
+    fig = go.Figure()
+    # Add probability range
+    fig.add_trace(
+        go.Scatter(
+            x=results_df["Mean Score"],
+            y=results_df["Mean Probability (%)"],
+            mode="lines+markers",
+            name="Mean Probability",
+            line=dict(color="blue"),
+            error_y=dict(
+                type="data",
+                symmetric=False,
+                array=results_df["Max Probability (%)"]
+                - results_df["Mean Probability (%)"],
+                arrayminus=results_df["Mean Probability (%)"]
+                - results_df["Min Probability (%)"],
+                color="gray",
+            ),
+        )
+    )
+    # Update layout
+    fig.update_layout(
+        title="Score-Probability Alignment",
+        xaxis_title="Score",
+        yaxis_title="Default Probability (%)",
+        showlegend=True,
+        template="plotly_white",
+        width=800,
+        height=600,
+    )
+    return results_df, fig

validmind/tests/model_validation/statsmodels/CumulativePredictionProbabilities.py CHANGED Viewed

@@ -9,22 +9,21 @@ from matplotlib import cm
 from validmind import tags, tasks
-@tags("visualization", "credit_risk", "logistic_regression")
+@tags("visualization", "credit_risk")
 @tasks("classification")
 def CumulativePredictionProbabilities(dataset, model, title="Cumulative Probabilities"):
     """
-    Visualizes cumulative probabilities of positive and negative classes for both training and testing in logistic
-    regression models.
+    Visualizes cumulative probabilities of positive and negative classes for both training and testing in classification models.
     ### Purpose
     This metric is utilized to evaluate the distribution of predicted probabilities for positive and negative classes
-    in a logistic regression model. It provides a visual assessment of the model's behavior by plotting the cumulative
+    in a classification model. It provides a visual assessment of the model's behavior by plotting the cumulative
     probabilities for positive and negative classes across both the training and test datasets.
     ### Test Mechanism
-    The logistic regression model is evaluated by first computing the predicted probabilities for each instance in both
+    The classification model is evaluated by first computing the predicted probabilities for each instance in both
     the training and test datasets, which are then added as a new column in these sets. The cumulative probabilities
     for positive and negative classes are subsequently calculated and sorted in ascending order. Cumulative
     distributions of these probabilities are created for both positive and negative classes across both training and
@@ -51,7 +50,7 @@ def CumulativePredictionProbabilities(dataset, model, title="Cumulative Probabil
     ### Limitations
-    - Exclusive to classification tasks and specifically to logistic regression models.
+    - Exclusive to classification tasks and specifically to classification models.
     - Graphical results necessitate human interpretation and may not be directly applicable for automated risk
     detection.
     - The method does not give a solitary quantifiable measure of model risk, instead, it offers a visual

validmind/tests/model_validation/statsmodels/PredictionProbabilitiesHistogram.py CHANGED Viewed

@@ -9,7 +9,7 @@ from matplotlib import cm
 from validmind import tags, tasks
-@tags("visualization", "credit_risk", "logistic_regression")
+@tags("visualization", "credit_risk")
 @tasks("classification")
 def PredictionProbabilitiesHistogram(
     dataset, model, title="Histogram of Predictive Probabilities"
@@ -22,7 +22,7 @@ def PredictionProbabilitiesHistogram(
     The Prediction Probabilities Histogram test is designed to generate histograms displaying the Probability of
     Default (PD) predictions for both positive and negative classes in training and testing datasets. This helps in
-    evaluating the performance of a logistic regression model, particularly for credit risk prediction.
+    evaluating the performance of a classification model.
     ### Test Mechanism
@@ -52,7 +52,6 @@ def PredictionProbabilitiesHistogram(
     ### Limitations
     - Specifically tailored for binary classification scenarios and not suited for multi-class classification tasks.
-    - Mainly applicable to logistic regression models, and may not be effective for other model types.
     - Provides a robust visual representation but lacks a quantifiable measure to assess model performance.
     """

validmind 2.7.2__py3-none-any.whl → 2.7.4__py3-none-any.whl

validmind 2.7.2py3-none-any.whl → 2.7.4py3-none-any.whl