validmind 2.8.10__py3-none-any.whl → 2.8.20__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- validmind/__init__.py +6 -5
- validmind/__version__.py +1 -1
- validmind/ai/test_descriptions.py +17 -11
- validmind/ai/utils.py +2 -2
- validmind/api_client.py +75 -32
- validmind/client.py +108 -100
- validmind/client_config.py +3 -3
- validmind/datasets/classification/__init__.py +7 -3
- validmind/datasets/credit_risk/lending_club.py +28 -16
- validmind/datasets/nlp/cnn_dailymail.py +10 -4
- validmind/datasets/regression/__init__.py +22 -5
- validmind/errors.py +17 -7
- validmind/input_registry.py +1 -1
- validmind/logging.py +44 -35
- validmind/models/foundation.py +2 -2
- validmind/models/function.py +10 -3
- validmind/template.py +30 -22
- validmind/test_suites/__init__.py +2 -2
- validmind/tests/_store.py +13 -4
- validmind/tests/comparison.py +65 -33
- validmind/tests/data_validation/ACFandPACFPlot.py +4 -1
- validmind/tests/data_validation/AutoMA.py +1 -1
- validmind/tests/data_validation/BivariateScatterPlots.py +5 -1
- validmind/tests/data_validation/BoxPierce.py +3 -1
- validmind/tests/data_validation/ClassImbalance.py +4 -2
- validmind/tests/data_validation/DatasetDescription.py +3 -24
- validmind/tests/data_validation/DescriptiveStatistics.py +1 -1
- validmind/tests/data_validation/DickeyFullerGLS.py +1 -1
- validmind/tests/data_validation/FeatureTargetCorrelationPlot.py +1 -1
- validmind/tests/data_validation/HighCardinality.py +5 -1
- validmind/tests/data_validation/HighPearsonCorrelation.py +1 -1
- validmind/tests/data_validation/IQROutliersBarPlot.py +5 -3
- validmind/tests/data_validation/IQROutliersTable.py +5 -2
- validmind/tests/data_validation/IsolationForestOutliers.py +5 -4
- validmind/tests/data_validation/JarqueBera.py +2 -2
- validmind/tests/data_validation/LJungBox.py +2 -2
- validmind/tests/data_validation/LaggedCorrelationHeatmap.py +1 -1
- validmind/tests/data_validation/MissingValues.py +14 -10
- validmind/tests/data_validation/MissingValuesBarPlot.py +3 -1
- validmind/tests/data_validation/MutualInformation.py +2 -1
- validmind/tests/data_validation/PearsonCorrelationMatrix.py +1 -1
- validmind/tests/data_validation/ProtectedClassesCombination.py +2 -0
- validmind/tests/data_validation/ProtectedClassesDescription.py +2 -2
- validmind/tests/data_validation/ProtectedClassesDisparity.py +9 -5
- validmind/tests/data_validation/ProtectedClassesThresholdOptimizer.py +10 -2
- validmind/tests/data_validation/RollingStatsPlot.py +2 -1
- validmind/tests/data_validation/ScoreBandDefaultRates.py +4 -2
- validmind/tests/data_validation/SeasonalDecompose.py +1 -1
- validmind/tests/data_validation/ShapiroWilk.py +2 -2
- validmind/tests/data_validation/Skewness.py +7 -6
- validmind/tests/data_validation/SpreadPlot.py +1 -1
- validmind/tests/data_validation/TabularCategoricalBarPlots.py +1 -1
- validmind/tests/data_validation/TabularDateTimeHistograms.py +1 -1
- validmind/tests/data_validation/TargetRateBarPlots.py +4 -1
- validmind/tests/data_validation/TimeSeriesFrequency.py +1 -1
- validmind/tests/data_validation/TimeSeriesOutliers.py +7 -2
- validmind/tests/data_validation/WOEBinPlots.py +1 -1
- validmind/tests/data_validation/WOEBinTable.py +1 -1
- validmind/tests/data_validation/ZivotAndrewsArch.py +5 -2
- validmind/tests/data_validation/nlp/CommonWords.py +1 -1
- validmind/tests/data_validation/nlp/Hashtags.py +1 -1
- validmind/tests/data_validation/nlp/LanguageDetection.py +1 -1
- validmind/tests/data_validation/nlp/Mentions.py +1 -1
- validmind/tests/data_validation/nlp/PolarityAndSubjectivity.py +5 -1
- validmind/tests/data_validation/nlp/Punctuations.py +1 -1
- validmind/tests/data_validation/nlp/Sentiment.py +3 -1
- validmind/tests/data_validation/nlp/TextDescription.py +1 -1
- validmind/tests/data_validation/nlp/Toxicity.py +1 -1
- validmind/tests/decorator.py +14 -11
- validmind/tests/load.py +38 -24
- validmind/tests/model_validation/BertScore.py +7 -1
- validmind/tests/model_validation/BleuScore.py +7 -1
- validmind/tests/model_validation/ClusterSizeDistribution.py +3 -1
- validmind/tests/model_validation/ContextualRecall.py +9 -1
- validmind/tests/model_validation/FeaturesAUC.py +1 -1
- validmind/tests/model_validation/MeteorScore.py +7 -1
- validmind/tests/model_validation/ModelPredictionResiduals.py +5 -1
- validmind/tests/model_validation/RegardScore.py +6 -1
- validmind/tests/model_validation/RegressionResidualsPlot.py +10 -1
- validmind/tests/model_validation/RougeScore.py +3 -1
- validmind/tests/model_validation/TimeSeriesPredictionWithCI.py +2 -0
- validmind/tests/model_validation/TimeSeriesPredictionsPlot.py +10 -2
- validmind/tests/model_validation/TimeSeriesR2SquareBySegments.py +6 -2
- validmind/tests/model_validation/TokenDisparity.py +5 -1
- validmind/tests/model_validation/ToxicityScore.py +2 -0
- validmind/tests/model_validation/embeddings/ClusterDistribution.py +1 -1
- validmind/tests/model_validation/embeddings/CosineSimilarityComparison.py +5 -1
- validmind/tests/model_validation/embeddings/CosineSimilarityDistribution.py +5 -1
- validmind/tests/model_validation/embeddings/CosineSimilarityHeatmap.py +5 -1
- validmind/tests/model_validation/embeddings/DescriptiveAnalytics.py +2 -0
- validmind/tests/model_validation/embeddings/EmbeddingsVisualization2D.py +5 -1
- validmind/tests/model_validation/embeddings/EuclideanDistanceComparison.py +6 -2
- validmind/tests/model_validation/embeddings/EuclideanDistanceHeatmap.py +3 -1
- validmind/tests/model_validation/embeddings/PCAComponentsPairwisePlots.py +4 -1
- validmind/tests/model_validation/embeddings/StabilityAnalysisKeyword.py +5 -1
- validmind/tests/model_validation/embeddings/StabilityAnalysisRandomNoise.py +5 -1
- validmind/tests/model_validation/embeddings/StabilityAnalysisSynonyms.py +5 -1
- validmind/tests/model_validation/embeddings/StabilityAnalysisTranslation.py +5 -1
- validmind/tests/model_validation/embeddings/TSNEComponentsPairwisePlots.py +6 -1
- validmind/tests/model_validation/ragas/AnswerCorrectness.py +5 -3
- validmind/tests/model_validation/ragas/AspectCritic.py +4 -1
- validmind/tests/model_validation/ragas/ContextEntityRecall.py +5 -3
- validmind/tests/model_validation/ragas/ContextPrecision.py +5 -3
- validmind/tests/model_validation/ragas/ContextPrecisionWithoutReference.py +5 -3
- validmind/tests/model_validation/ragas/ContextRecall.py +5 -3
- validmind/tests/model_validation/ragas/Faithfulness.py +5 -3
- validmind/tests/model_validation/ragas/NoiseSensitivity.py +1 -1
- validmind/tests/model_validation/ragas/ResponseRelevancy.py +5 -3
- validmind/tests/model_validation/ragas/SemanticSimilarity.py +5 -3
- validmind/tests/model_validation/sklearn/AdjustedMutualInformation.py +9 -9
- validmind/tests/model_validation/sklearn/AdjustedRandIndex.py +9 -9
- validmind/tests/model_validation/sklearn/CalibrationCurve.py +5 -2
- validmind/tests/model_validation/sklearn/ClassifierThresholdOptimization.py +28 -5
- validmind/tests/model_validation/sklearn/ClusterCosineSimilarity.py +5 -1
- validmind/tests/model_validation/sklearn/ClusterPerformanceMetrics.py +24 -14
- validmind/tests/model_validation/sklearn/CompletenessScore.py +8 -9
- validmind/tests/model_validation/sklearn/ConfusionMatrix.py +22 -3
- validmind/tests/model_validation/sklearn/FeatureImportance.py +6 -2
- validmind/tests/model_validation/sklearn/FowlkesMallowsScore.py +12 -9
- validmind/tests/model_validation/sklearn/HomogeneityScore.py +14 -9
- validmind/tests/model_validation/sklearn/HyperParametersTuning.py +4 -2
- validmind/tests/model_validation/sklearn/KMeansClustersOptimization.py +6 -1
- validmind/tests/model_validation/sklearn/MinimumAccuracy.py +12 -7
- validmind/tests/model_validation/sklearn/MinimumF1Score.py +12 -7
- validmind/tests/model_validation/sklearn/MinimumROCAUCScore.py +21 -6
- validmind/tests/model_validation/sklearn/OverfitDiagnosis.py +11 -3
- validmind/tests/model_validation/sklearn/PermutationFeatureImportance.py +5 -1
- validmind/tests/model_validation/sklearn/PopulationStabilityIndex.py +5 -1
- validmind/tests/model_validation/sklearn/PrecisionRecallCurve.py +6 -1
- validmind/tests/model_validation/sklearn/ROCCurve.py +3 -1
- validmind/tests/model_validation/sklearn/RegressionErrors.py +6 -2
- validmind/tests/model_validation/sklearn/RegressionPerformance.py +13 -8
- validmind/tests/model_validation/sklearn/RegressionR2Square.py +8 -5
- validmind/tests/model_validation/sklearn/RobustnessDiagnosis.py +5 -1
- validmind/tests/model_validation/sklearn/SHAPGlobalImportance.py +34 -26
- validmind/tests/model_validation/sklearn/ScoreProbabilityAlignment.py +10 -2
- validmind/tests/model_validation/sklearn/SilhouettePlot.py +5 -1
- validmind/tests/model_validation/sklearn/VMeasure.py +12 -9
- validmind/tests/model_validation/sklearn/WeakspotsDiagnosis.py +15 -10
- validmind/tests/model_validation/statsmodels/CumulativePredictionProbabilities.py +5 -1
- validmind/tests/model_validation/statsmodels/DurbinWatsonTest.py +6 -1
- validmind/tests/model_validation/statsmodels/GINITable.py +8 -1
- validmind/tests/model_validation/statsmodels/KolmogorovSmirnov.py +2 -2
- validmind/tests/model_validation/statsmodels/PredictionProbabilitiesHistogram.py +6 -2
- validmind/tests/model_validation/statsmodels/RegressionCoeffs.py +8 -2
- validmind/tests/model_validation/statsmodels/RegressionFeatureSignificance.py +3 -1
- validmind/tests/model_validation/statsmodels/RegressionModelForecastPlot.py +7 -2
- validmind/tests/model_validation/statsmodels/RegressionModelForecastPlotLevels.py +2 -0
- validmind/tests/model_validation/statsmodels/RegressionModelSensitivityPlot.py +2 -0
- validmind/tests/model_validation/statsmodels/RegressionModelSummary.py +4 -2
- validmind/tests/model_validation/statsmodels/RegressionPermutationFeatureImportance.py +3 -1
- validmind/tests/ongoing_monitoring/CalibrationCurveDrift.py +11 -1
- validmind/tests/ongoing_monitoring/ClassificationAccuracyDrift.py +10 -2
- validmind/tests/ongoing_monitoring/ConfusionMatrixDrift.py +8 -1
- validmind/tests/ongoing_monitoring/CumulativePredictionProbabilitiesDrift.py +18 -2
- validmind/tests/ongoing_monitoring/FeatureDrift.py +9 -2
- validmind/tests/ongoing_monitoring/PredictionAcrossEachFeature.py +8 -2
- validmind/tests/ongoing_monitoring/PredictionCorrelation.py +13 -2
- validmind/tests/ongoing_monitoring/PredictionProbabilitiesHistogramDrift.py +13 -2
- validmind/tests/ongoing_monitoring/ROCCurveDrift.py +16 -2
- validmind/tests/ongoing_monitoring/ScoreBandsDrift.py +11 -2
- validmind/tests/ongoing_monitoring/TargetPredictionDistributionPlot.py +13 -2
- validmind/tests/output.py +66 -11
- validmind/tests/prompt_validation/Clarity.py +1 -1
- validmind/tests/prompt_validation/NegativeInstruction.py +1 -1
- validmind/tests/prompt_validation/Robustness.py +6 -1
- validmind/tests/prompt_validation/Specificity.py +1 -1
- validmind/tests/run.py +28 -14
- validmind/tests/test_providers.py +28 -35
- validmind/tests/utils.py +17 -4
- validmind/unit_metrics/__init__.py +1 -1
- validmind/utils.py +295 -31
- validmind/vm_models/dataset/dataset.py +19 -16
- validmind/vm_models/dataset/utils.py +5 -3
- validmind/vm_models/figure.py +6 -6
- validmind/vm_models/input.py +6 -5
- validmind/vm_models/model.py +5 -5
- validmind/vm_models/result/result.py +122 -43
- validmind/vm_models/result/utils.py +9 -28
- validmind/vm_models/test_suite/__init__.py +5 -0
- validmind/vm_models/test_suite/runner.py +5 -5
- validmind/vm_models/test_suite/summary.py +20 -2
- validmind/vm_models/test_suite/test.py +6 -6
- validmind/vm_models/test_suite/test_suite.py +10 -10
- {validmind-2.8.10.dist-info → validmind-2.8.20.dist-info}/METADATA +4 -5
- {validmind-2.8.10.dist-info → validmind-2.8.20.dist-info}/RECORD +189 -188
- {validmind-2.8.10.dist-info → validmind-2.8.20.dist-info}/WHEEL +1 -1
- {validmind-2.8.10.dist-info → validmind-2.8.20.dist-info}/LICENSE +0 -0
- {validmind-2.8.10.dist-info → validmind-2.8.20.dist-info}/entry_points.txt +0 -0
@@ -6,7 +6,7 @@ import numpy as np
|
|
6
6
|
from sklearn.metrics import roc_auc_score
|
7
7
|
from sklearn.preprocessing import LabelBinarizer
|
8
8
|
|
9
|
-
from validmind import tags, tasks
|
9
|
+
from validmind import RawData, tags, tasks
|
10
10
|
from validmind.vm_models import VMDataset, VMModel
|
11
11
|
|
12
12
|
|
@@ -62,19 +62,34 @@ def MinimumROCAUCScore(dataset: VMDataset, model: VMModel, min_threshold: float
|
|
62
62
|
lb = LabelBinarizer()
|
63
63
|
lb.fit(y_true)
|
64
64
|
|
65
|
+
y_true_binary = lb.transform(y_true)
|
66
|
+
y_score_binary = lb.transform(dataset.y_pred(model))
|
67
|
+
|
65
68
|
roc_auc = roc_auc_score(
|
66
|
-
y_true=
|
67
|
-
y_score=
|
69
|
+
y_true=y_true_binary,
|
70
|
+
y_score=y_score_binary,
|
68
71
|
average="macro",
|
69
72
|
)
|
70
73
|
|
71
74
|
else:
|
72
|
-
|
75
|
+
y_score_prob = dataset.y_prob(model)
|
76
|
+
roc_auc = roc_auc_score(y_true=y_true, y_score=y_score_prob)
|
73
77
|
|
74
|
-
|
78
|
+
results = [
|
75
79
|
{
|
76
80
|
"Score": roc_auc,
|
77
81
|
"Threshold": min_threshold,
|
78
82
|
"Pass/Fail": "Pass" if roc_auc > min_threshold else "Fail",
|
79
83
|
}
|
80
|
-
]
|
84
|
+
]
|
85
|
+
|
86
|
+
return (
|
87
|
+
results,
|
88
|
+
roc_auc > min_threshold,
|
89
|
+
RawData(
|
90
|
+
y_true=y_true,
|
91
|
+
roc_auc=roc_auc,
|
92
|
+
model=model.input_id,
|
93
|
+
dataset=dataset.input_id,
|
94
|
+
),
|
95
|
+
)
|
@@ -10,7 +10,7 @@ import pandas as pd
|
|
10
10
|
import seaborn as sns
|
11
11
|
from sklearn import metrics
|
12
12
|
|
13
|
-
from validmind import tags, tasks
|
13
|
+
from validmind import RawData, tags, tasks
|
14
14
|
from validmind.logging import get_logger
|
15
15
|
from validmind.vm_models import VMDataset, VMModel
|
16
16
|
|
@@ -73,6 +73,7 @@ def _prepare_results(
|
|
73
73
|
columns={"shape": "training records", f"{metric}": f"training {metric}"},
|
74
74
|
inplace=True,
|
75
75
|
)
|
76
|
+
results["test records"] = results_test["shape"]
|
76
77
|
results[f"test {metric}"] = results_test[metric]
|
77
78
|
|
78
79
|
# Adjust gap calculation based on metric directionality
|
@@ -292,11 +293,18 @@ def OverfitDiagnosis(
|
|
292
293
|
{
|
293
294
|
"Feature": feature_column,
|
294
295
|
"Slice": row["slice"],
|
295
|
-
"Number of Records": row["training records"],
|
296
|
+
"Number of Training Records": row["training records"],
|
297
|
+
"Number of Test Records": row["test records"],
|
296
298
|
f"Training {metric.upper()}": row[f"training {metric}"],
|
297
299
|
f"Test {metric.upper()}": row[f"test {metric}"],
|
298
300
|
"Gap": row["gap"],
|
299
301
|
}
|
300
302
|
)
|
301
303
|
|
302
|
-
return (
|
304
|
+
return (
|
305
|
+
{"Overfit Diagnosis": test_results},
|
306
|
+
*figures,
|
307
|
+
RawData(
|
308
|
+
model=model.input_id, datasets=[dataset.input_id for dataset in datasets]
|
309
|
+
),
|
310
|
+
)
|
@@ -83,4 +83,9 @@ def PrecisionRecallCurve(model: VMModel, dataset: VMDataset):
|
|
83
83
|
),
|
84
84
|
)
|
85
85
|
|
86
|
-
return fig, RawData(
|
86
|
+
return fig, RawData(
|
87
|
+
precision=precision,
|
88
|
+
recall=recall,
|
89
|
+
model=model.input_id,
|
90
|
+
dataset=dataset.input_id,
|
91
|
+
)
|
@@ -6,7 +6,7 @@ import numpy as np
|
|
6
6
|
import pandas as pd
|
7
7
|
from sklearn import metrics
|
8
8
|
|
9
|
-
from validmind import tags, tasks
|
9
|
+
from validmind import RawData, tags, tasks
|
10
10
|
|
11
11
|
|
12
12
|
@tags("sklearn", "model_performance")
|
@@ -62,7 +62,11 @@ def RegressionErrors(model, dataset):
|
|
62
62
|
y_pred = dataset.y_pred(model)
|
63
63
|
y_true = y_true.astype(y_pred.dtype)
|
64
64
|
|
65
|
-
|
65
|
+
results_df = _regression_errors(y_true, y_pred)
|
66
|
+
|
67
|
+
return results_df, RawData(
|
68
|
+
y_true=y_true, y_pred=y_pred, model=model.input_id, dataset=dataset.input_id
|
69
|
+
)
|
66
70
|
|
67
71
|
|
68
72
|
def _regression_errors(y_true, y_pred):
|
@@ -5,7 +5,7 @@
|
|
5
5
|
import numpy as np
|
6
6
|
from sklearn.metrics import mean_absolute_error, mean_squared_error
|
7
7
|
|
8
|
-
from validmind import tags, tasks
|
8
|
+
from validmind import RawData, tags, tasks
|
9
9
|
from validmind.logging import get_logger
|
10
10
|
from validmind.vm_models import VMDataset, VMModel
|
11
11
|
|
@@ -74,10 +74,15 @@ def RegressionPerformance(model: VMModel, dataset: VMDataset):
|
|
74
74
|
# MBD calculation
|
75
75
|
metrics["Mean Bias Deviation (MBD)"] = np.mean(y_pred - y_true)
|
76
76
|
|
77
|
-
return
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
77
|
+
return (
|
78
|
+
[
|
79
|
+
{
|
80
|
+
"Metric": metric,
|
81
|
+
"Value": value,
|
82
|
+
}
|
83
|
+
for metric, value in metrics.items()
|
84
|
+
],
|
85
|
+
RawData(
|
86
|
+
y_true=y_true, y_pred=y_pred, model=model.input_id, dataset=dataset.input_id
|
87
|
+
),
|
88
|
+
)
|
@@ -5,7 +5,7 @@
|
|
5
5
|
import pandas as pd
|
6
6
|
from sklearn import metrics
|
7
7
|
|
8
|
-
from validmind import tags, tasks
|
8
|
+
from validmind import RawData, tags, tasks
|
9
9
|
from validmind.tests.model_validation.statsmodels.statsutils import adj_r2_score
|
10
10
|
|
11
11
|
|
@@ -55,11 +55,14 @@ def RegressionR2Square(dataset, model):
|
|
55
55
|
y_pred = dataset.y_pred(model)
|
56
56
|
y_true = y_true.astype(y_pred.dtype)
|
57
57
|
|
58
|
+
r2 = metrics.r2_score(y_true, y_pred)
|
59
|
+
adj_r2 = adj_r2_score(y_true, y_pred, len(y_true), len(dataset.feature_columns))
|
60
|
+
|
58
61
|
return pd.DataFrame(
|
59
62
|
{
|
60
|
-
"R-squared (R2) Score": [
|
61
|
-
"Adjusted R-squared (R2) Score": [
|
62
|
-
adj_r2_score(y_true, y_pred, len(y_true), len(dataset.feature_columns))
|
63
|
-
],
|
63
|
+
"R-squared (R2) Score": [r2],
|
64
|
+
"Adjusted R-squared (R2) Score": [adj_r2],
|
64
65
|
}
|
66
|
+
), RawData(
|
67
|
+
y_true=y_true, y_pred=y_pred, model=model.input_id, dataset=dataset.input_id
|
65
68
|
)
|
@@ -3,10 +3,12 @@
|
|
3
3
|
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
4
|
|
5
5
|
import warnings
|
6
|
+
from typing import Dict, List, Optional, Union
|
6
7
|
from warnings import filters as _warnings_filters
|
7
8
|
|
8
9
|
import matplotlib.pyplot as plt
|
9
10
|
import numpy as np
|
11
|
+
import pandas as pd
|
10
12
|
import shap
|
11
13
|
|
12
14
|
from validmind import RawData, tags, tasks
|
@@ -18,7 +20,10 @@ from validmind.vm_models import VMDataset, VMModel
|
|
18
20
|
logger = get_logger(__name__)
|
19
21
|
|
20
22
|
|
21
|
-
def select_shap_values(
|
23
|
+
def select_shap_values(
|
24
|
+
shap_values: Union[np.ndarray, List[np.ndarray]],
|
25
|
+
class_of_interest: Optional[int] = None,
|
26
|
+
) -> np.ndarray:
|
22
27
|
"""Selects SHAP values for binary or multiclass classification.
|
23
28
|
|
24
29
|
For regression models, returns the SHAP values directly as there are no classes.
|
@@ -41,32 +46,30 @@ def select_shap_values(shap_values, class_of_interest):
|
|
41
46
|
"""
|
42
47
|
if not isinstance(shap_values, list):
|
43
48
|
# For regression, return the SHAP values as they are
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
49
|
+
selected_values = shap_values
|
50
|
+
else:
|
51
|
+
num_classes = len(shap_values)
|
52
|
+
# Default to class 1 for binary classification where no class is specified
|
53
|
+
if num_classes == 2 and class_of_interest is None:
|
54
|
+
selected_values = shap_values[1]
|
55
|
+
# Otherwise, use the specified class_of_interest
|
56
|
+
elif class_of_interest is not None and 0 <= class_of_interest < num_classes:
|
57
|
+
selected_values = shap_values[class_of_interest]
|
58
|
+
else:
|
59
|
+
raise ValueError(
|
60
|
+
f"Invalid class_of_interest: {class_of_interest}. Must be between 0 and {num_classes - 1}."
|
61
|
+
)
|
54
62
|
|
55
|
-
#
|
56
|
-
if (
|
57
|
-
|
58
|
-
or class_of_interest < 0
|
59
|
-
or class_of_interest >= num_classes
|
60
|
-
):
|
61
|
-
raise ValueError(
|
62
|
-
f"Invalid class_of_interest: {class_of_interest}. Must be between 0 and {num_classes - 1}."
|
63
|
-
)
|
63
|
+
# Add type conversion here to ensure proper float array
|
64
|
+
if hasattr(selected_values, "dtype"):
|
65
|
+
selected_values = np.array(selected_values, dtype=np.float64)
|
64
66
|
|
65
|
-
|
66
|
-
return shap_values[class_of_interest]
|
67
|
+
return selected_values
|
67
68
|
|
68
69
|
|
69
|
-
def generate_shap_plot(
|
70
|
+
def generate_shap_plot(
|
71
|
+
type_: str, shap_values: np.ndarray, x_test: Union[np.ndarray, pd.DataFrame]
|
72
|
+
) -> plt.Figure:
|
70
73
|
"""Plots two types of SHAP global importance (SHAP).
|
71
74
|
|
72
75
|
Args:
|
@@ -117,8 +120,8 @@ def SHAPGlobalImportance(
|
|
117
120
|
dataset: VMDataset,
|
118
121
|
kernel_explainer_samples: int = 10,
|
119
122
|
tree_or_linear_explainer_samples: int = 200,
|
120
|
-
class_of_interest: int = None,
|
121
|
-
):
|
123
|
+
class_of_interest: Optional[int] = None,
|
124
|
+
) -> Dict[str, Union[plt.Figure, Dict[str, float]]]:
|
122
125
|
"""
|
123
126
|
Evaluates and visualizes global feature importance using SHAP values for model explanation and risk identification.
|
124
127
|
|
@@ -229,5 +232,10 @@ def SHAPGlobalImportance(
|
|
229
232
|
return (
|
230
233
|
generate_shap_plot("mean", shap_values, shap_sample),
|
231
234
|
generate_shap_plot("summary", shap_values, shap_sample),
|
232
|
-
RawData(
|
235
|
+
RawData(
|
236
|
+
shap_values=shap_values,
|
237
|
+
shap_sample=shap_sample,
|
238
|
+
model=model.input_id,
|
239
|
+
dataset=dataset.input_id,
|
240
|
+
),
|
233
241
|
)
|
@@ -5,7 +5,7 @@
|
|
5
5
|
import pandas as pd
|
6
6
|
import plotly.graph_objects as go
|
7
7
|
|
8
|
-
from validmind import tags, tasks
|
8
|
+
from validmind import RawData, tags, tasks
|
9
9
|
from validmind.vm_models import VMDataset, VMModel
|
10
10
|
|
11
11
|
|
@@ -128,4 +128,12 @@ def ScoreProbabilityAlignment(
|
|
128
128
|
height=600,
|
129
129
|
)
|
130
130
|
|
131
|
-
|
131
|
+
# Include raw data for post-processing
|
132
|
+
raw_data = RawData(
|
133
|
+
score_bins=df[["score_bin", score_column]],
|
134
|
+
predicted_probabilities=df["probability"],
|
135
|
+
model=model.input_id,
|
136
|
+
dataset=dataset.input_id,
|
137
|
+
)
|
138
|
+
|
139
|
+
return results_df, fig, raw_data
|
@@ -110,5 +110,9 @@ def SilhouettePlot(model: VMModel, dataset: VMDataset):
|
|
110
110
|
"Silhouette Score": silhouette_avg,
|
111
111
|
},
|
112
112
|
fig,
|
113
|
-
RawData(
|
113
|
+
RawData(
|
114
|
+
sample_silhouette_values=sample_silhouette_values,
|
115
|
+
model=model.input_id,
|
116
|
+
dataset=dataset.input_id,
|
117
|
+
),
|
114
118
|
)
|
@@ -4,7 +4,7 @@
|
|
4
4
|
|
5
5
|
from sklearn import metrics
|
6
6
|
|
7
|
-
from validmind import tags, tasks
|
7
|
+
from validmind import RawData, tags, tasks
|
8
8
|
from validmind.vm_models import VMDataset, VMModel
|
9
9
|
|
10
10
|
|
@@ -48,11 +48,14 @@ def VMeasure(dataset: VMDataset, model: VMModel):
|
|
48
48
|
the other. The V Measure Score does not provide flexibility in assigning different weights to homogeneity and
|
49
49
|
completeness.
|
50
50
|
"""
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
}
|
58
|
-
|
51
|
+
v_measure = metrics.v_measure_score(
|
52
|
+
labels_true=dataset.y,
|
53
|
+
labels_pred=dataset.y_pred(model),
|
54
|
+
)
|
55
|
+
|
56
|
+
return (
|
57
|
+
[{"V Measure": v_measure}],
|
58
|
+
RawData(
|
59
|
+
v_measure_score=v_measure, model=model.input_id, dataset=dataset.input_id
|
60
|
+
),
|
61
|
+
)
|
@@ -47,7 +47,7 @@ def _compute_metrics(
|
|
47
47
|
None: The computed metrics are appended to the `results` dictionary in-place.
|
48
48
|
"""
|
49
49
|
results["Slice"].append(str(region))
|
50
|
-
results["
|
50
|
+
results["Number of Records"].append(df_region.shape[0])
|
51
51
|
results["Feature"].append(feature_column)
|
52
52
|
|
53
53
|
# Check if df_region is an empty dataframe and if so, append 0 to all metrics
|
@@ -222,7 +222,7 @@ def WeakspotsDiagnosis(
|
|
222
222
|
thresholds = thresholds or DEFAULT_THRESHOLDS
|
223
223
|
thresholds = {k.title(): v for k, v in thresholds.items()}
|
224
224
|
|
225
|
-
results_headers = ["Slice", "
|
225
|
+
results_headers = ["Slice", "Number of Records", "Feature"]
|
226
226
|
results_headers.extend(metrics.keys())
|
227
227
|
|
228
228
|
figures = []
|
@@ -236,19 +236,20 @@ def WeakspotsDiagnosis(
|
|
236
236
|
feature_columns
|
237
237
|
+ [datasets[1].target_column, datasets[1].prediction_column(model)]
|
238
238
|
]
|
239
|
-
|
239
|
+
results_1 = pd.DataFrame()
|
240
|
+
results_2 = pd.DataFrame()
|
240
241
|
for feature in feature_columns:
|
241
242
|
bins = 10
|
242
243
|
if feature in datasets[0].feature_columns_categorical:
|
243
244
|
bins = len(df_1[feature].unique())
|
244
245
|
df_1["bin"] = pd.cut(df_1[feature], bins=bins)
|
245
246
|
|
246
|
-
|
247
|
-
|
247
|
+
r1 = {k: [] for k in results_headers}
|
248
|
+
r2 = {k: [] for k in results_headers}
|
248
249
|
|
249
250
|
for region, df_region in df_1.groupby("bin"):
|
250
251
|
_compute_metrics(
|
251
|
-
results=
|
252
|
+
results=r1,
|
252
253
|
metrics=metrics,
|
253
254
|
region=region,
|
254
255
|
df_region=df_region,
|
@@ -260,7 +261,7 @@ def WeakspotsDiagnosis(
|
|
260
261
|
(df_2[feature] > region.left) & (df_2[feature] <= region.right)
|
261
262
|
]
|
262
263
|
_compute_metrics(
|
263
|
-
results=
|
264
|
+
results=r2,
|
264
265
|
metrics=metrics,
|
265
266
|
region=region,
|
266
267
|
df_region=df_2_region,
|
@@ -271,8 +272,8 @@ def WeakspotsDiagnosis(
|
|
271
272
|
|
272
273
|
for metric in metrics.keys():
|
273
274
|
fig, df = _plot_weak_spots(
|
274
|
-
results_1=
|
275
|
-
results_2=
|
275
|
+
results_1=r1,
|
276
|
+
results_2=r2,
|
276
277
|
feature_column=feature,
|
277
278
|
metric=metric,
|
278
279
|
threshold=thresholds[metric],
|
@@ -284,6 +285,8 @@ def WeakspotsDiagnosis(
|
|
284
285
|
# rely on visual assessment for this test for now.
|
285
286
|
if not df[df[list(thresholds.keys())].lt(thresholds).any(axis=1)].empty:
|
286
287
|
passed = False
|
288
|
+
results_1 = pd.concat([results_1, pd.DataFrame(r1)])
|
289
|
+
results_2 = pd.concat([results_2, pd.DataFrame(r2)])
|
287
290
|
|
288
291
|
return (
|
289
292
|
pd.concat(
|
@@ -291,7 +294,9 @@ def WeakspotsDiagnosis(
|
|
291
294
|
pd.DataFrame(results_1).assign(Dataset=datasets[0].input_id),
|
292
295
|
pd.DataFrame(results_2).assign(Dataset=datasets[1].input_id),
|
293
296
|
]
|
294
|
-
)
|
297
|
+
)
|
298
|
+
.reset_index(drop=True)
|
299
|
+
.sort_values(["Feature", "Dataset"]),
|
295
300
|
*figures,
|
296
301
|
passed,
|
297
302
|
)
|
@@ -64,7 +64,11 @@ def CumulativePredictionProbabilities(dataset, model, title="Cumulative Probabil
|
|
64
64
|
|
65
65
|
fig, fig_data = _plot_cumulative_prob(df, dataset.target_column, title)
|
66
66
|
|
67
|
-
return fig, RawData(
|
67
|
+
return fig, RawData(
|
68
|
+
cumulative_probabilities=fig_data,
|
69
|
+
model=model.input_id,
|
70
|
+
dataset=dataset.input_id,
|
71
|
+
)
|
68
72
|
|
69
73
|
|
70
74
|
def _plot_cumulative_prob(df, target_col, title):
|
@@ -5,7 +5,7 @@
|
|
5
5
|
import pandas as pd
|
6
6
|
from statsmodels.stats.stattools import durbin_watson
|
7
7
|
|
8
|
-
from validmind import tags, tasks
|
8
|
+
from validmind import RawData, tags, tasks
|
9
9
|
|
10
10
|
|
11
11
|
@tasks("regression")
|
@@ -81,4 +81,9 @@ def DurbinWatsonTest(dataset, model, threshold=[1.5, 2.5]):
|
|
81
81
|
"threshold": [str(threshold)],
|
82
82
|
"autocorrelation": [get_autocorrelation(dw_statistic, threshold)],
|
83
83
|
}
|
84
|
+
), RawData(
|
85
|
+
residuals=residuals,
|
86
|
+
dw_statistic=dw_statistic,
|
87
|
+
model=model.input_id,
|
88
|
+
dataset=dataset.input_id,
|
84
89
|
)
|
@@ -6,7 +6,7 @@ import numpy as np
|
|
6
6
|
import pandas as pd
|
7
7
|
from sklearn.metrics import roc_auc_score, roc_curve
|
8
8
|
|
9
|
-
from validmind import tags, tasks
|
9
|
+
from validmind import RawData, tags, tasks
|
10
10
|
|
11
11
|
|
12
12
|
@tags("model_performance")
|
@@ -76,4 +76,11 @@ def GINITable(dataset, model):
|
|
76
76
|
"GINI": [gini],
|
77
77
|
"KS": [max(tpr - fpr)],
|
78
78
|
}
|
79
|
+
), RawData(
|
80
|
+
fpr=fpr,
|
81
|
+
tpr=tpr,
|
82
|
+
y_true=y_true,
|
83
|
+
y_prob=y_prob,
|
84
|
+
model=model.input_id,
|
85
|
+
dataset=dataset.input_id,
|
79
86
|
)
|
@@ -4,7 +4,7 @@
|
|
4
4
|
|
5
5
|
from statsmodels.stats.diagnostic import kstest_normal
|
6
6
|
|
7
|
-
from validmind import tags, tasks
|
7
|
+
from validmind import RawData, tags, tasks
|
8
8
|
from validmind.errors import InvalidTestParametersError
|
9
9
|
from validmind.vm_models import VMDataset, VMModel
|
10
10
|
|
@@ -66,4 +66,4 @@ def KolmogorovSmirnov(model: VMModel, dataset: VMDataset, dist: str = "norm"):
|
|
66
66
|
"P-Value": result["pvalue"],
|
67
67
|
}
|
68
68
|
for k, result in ks_values.items()
|
69
|
-
]
|
69
|
+
], RawData(ks_values=ks_values, dataset=dataset.input_id)
|
@@ -6,7 +6,7 @@
|
|
6
6
|
import plotly.graph_objects as go
|
7
7
|
from matplotlib import cm
|
8
8
|
|
9
|
-
from validmind import tags, tasks
|
9
|
+
from validmind import RawData, tags, tasks
|
10
10
|
|
11
11
|
|
12
12
|
@tags("visualization", "credit_risk")
|
@@ -60,7 +60,11 @@ def PredictionProbabilitiesHistogram(
|
|
60
60
|
|
61
61
|
fig = _plot_prob_histogram(df, dataset.target_column, title)
|
62
62
|
|
63
|
-
return fig
|
63
|
+
return fig, RawData(
|
64
|
+
probabilities=df["probabilities"],
|
65
|
+
model=model.input_id,
|
66
|
+
dataset=dataset.input_id,
|
67
|
+
)
|
64
68
|
|
65
69
|
|
66
70
|
def _plot_prob_histogram(df, target_col, title):
|
@@ -7,7 +7,7 @@ import pandas as pd
|
|
7
7
|
import plotly.graph_objects as go
|
8
8
|
from scipy import stats
|
9
9
|
|
10
|
-
from validmind import tags, tasks
|
10
|
+
from validmind import RawData, tags, tasks
|
11
11
|
from validmind.errors import SkipTestError
|
12
12
|
|
13
13
|
|
@@ -97,4 +97,10 @@ def RegressionCoeffs(model):
|
|
97
97
|
yaxis_title="Coefficients",
|
98
98
|
)
|
99
99
|
|
100
|
-
return
|
100
|
+
return (
|
101
|
+
fig,
|
102
|
+
coefficients,
|
103
|
+
RawData(
|
104
|
+
model=model.input_id, std_err=std_err, lower_ci=lower_ci, upper_ci=upper_ci
|
105
|
+
),
|
106
|
+
)
|
@@ -7,7 +7,7 @@ from typing import Union
|
|
7
7
|
import matplotlib.pyplot as plt
|
8
8
|
import pandas as pd
|
9
9
|
|
10
|
-
from validmind import tags, tasks
|
10
|
+
from validmind import RawData, tags, tasks
|
11
11
|
from validmind.logging import get_logger
|
12
12
|
from validmind.vm_models import VMDataset, VMModel
|
13
13
|
|
@@ -87,4 +87,9 @@ def RegressionModelForecastPlot(
|
|
87
87
|
|
88
88
|
plt.close()
|
89
89
|
|
90
|
-
return fig
|
90
|
+
return fig, RawData(
|
91
|
+
observed_values=dataset.y.tolist(),
|
92
|
+
forecast_values=dataset.y_pred(model).tolist(),
|
93
|
+
model=model.input_id,
|
94
|
+
dataset=dataset.input_id,
|
95
|
+
)
|
@@ -4,7 +4,7 @@
|
|
4
4
|
|
5
5
|
from sklearn.metrics import mean_squared_error, r2_score
|
6
6
|
|
7
|
-
from validmind import tags, tasks
|
7
|
+
from validmind import RawData, tags, tasks
|
8
8
|
from validmind.vm_models import VMDataset, VMModel
|
9
9
|
|
10
10
|
from .statsutils import adj_r2_score
|
@@ -58,4 +58,6 @@ def RegressionModelSummary(dataset: VMDataset, model: VMModel):
|
|
58
58
|
"MSE": mean_squared_error(y_true, y_pred, squared=True),
|
59
59
|
"RMSE": mean_squared_error(y_true, y_pred, squared=False),
|
60
60
|
}
|
61
|
-
]
|
61
|
+
], RawData(
|
62
|
+
y_true=y_true, y_pred=y_pred, model=model.input_id, dataset=dataset.input_id
|
63
|
+
)
|