validmind 2.7.6__py3-none-any.whl → 2.7.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- validmind/__init__.py +2 -0
- validmind/__version__.py +1 -1
- validmind/api_client.py +8 -1
- validmind/datasets/credit_risk/lending_club.py +3 -4
- validmind/html_templates/content_blocks.py +1 -1
- validmind/tests/__types__.py +17 -0
- validmind/tests/data_validation/ACFandPACFPlot.py +6 -2
- validmind/tests/data_validation/AutoMA.py +2 -2
- validmind/tests/data_validation/BivariateScatterPlots.py +4 -2
- validmind/tests/data_validation/BoxPierce.py +2 -2
- validmind/tests/data_validation/ClassImbalance.py +2 -1
- validmind/tests/data_validation/DatasetDescription.py +11 -2
- validmind/tests/data_validation/DatasetSplit.py +2 -2
- validmind/tests/data_validation/DickeyFullerGLS.py +2 -2
- validmind/tests/data_validation/FeatureTargetCorrelationPlot.py +8 -2
- validmind/tests/data_validation/HighCardinality.py +9 -2
- validmind/tests/data_validation/HighPearsonCorrelation.py +6 -2
- validmind/tests/data_validation/IQROutliersBarPlot.py +9 -2
- validmind/tests/data_validation/LaggedCorrelationHeatmap.py +2 -2
- validmind/tests/data_validation/MissingValuesBarPlot.py +12 -9
- validmind/tests/data_validation/MutualInformation.py +6 -8
- validmind/tests/data_validation/PearsonCorrelationMatrix.py +2 -2
- validmind/tests/data_validation/ProtectedClassesCombination.py +6 -1
- validmind/tests/data_validation/ProtectedClassesDescription.py +1 -1
- validmind/tests/data_validation/ProtectedClassesDisparity.py +4 -5
- validmind/tests/data_validation/ProtectedClassesThresholdOptimizer.py +1 -4
- validmind/tests/data_validation/RollingStatsPlot.py +21 -10
- validmind/tests/data_validation/ScatterPlot.py +3 -5
- validmind/tests/data_validation/ScoreBandDefaultRates.py +2 -1
- validmind/tests/data_validation/SeasonalDecompose.py +12 -2
- validmind/tests/data_validation/Skewness.py +6 -3
- validmind/tests/data_validation/SpreadPlot.py +8 -3
- validmind/tests/data_validation/TabularCategoricalBarPlots.py +4 -2
- validmind/tests/data_validation/TabularDateTimeHistograms.py +2 -2
- validmind/tests/data_validation/TargetRateBarPlots.py +4 -3
- validmind/tests/data_validation/TimeSeriesFrequency.py +7 -2
- validmind/tests/data_validation/TimeSeriesMissingValues.py +14 -10
- validmind/tests/data_validation/TimeSeriesOutliers.py +1 -5
- validmind/tests/data_validation/WOEBinPlots.py +2 -2
- validmind/tests/data_validation/WOEBinTable.py +11 -9
- validmind/tests/data_validation/nlp/CommonWords.py +2 -2
- validmind/tests/data_validation/nlp/Hashtags.py +2 -2
- validmind/tests/data_validation/nlp/LanguageDetection.py +9 -6
- validmind/tests/data_validation/nlp/Mentions.py +9 -6
- validmind/tests/data_validation/nlp/PolarityAndSubjectivity.py +2 -2
- validmind/tests/data_validation/nlp/Punctuations.py +4 -2
- validmind/tests/data_validation/nlp/Sentiment.py +2 -2
- validmind/tests/data_validation/nlp/StopWords.py +5 -4
- validmind/tests/data_validation/nlp/TextDescription.py +2 -2
- validmind/tests/data_validation/nlp/Toxicity.py +2 -2
- validmind/tests/model_validation/BertScore.py +2 -2
- validmind/tests/model_validation/BleuScore.py +2 -2
- validmind/tests/model_validation/ClusterSizeDistribution.py +2 -2
- validmind/tests/model_validation/ContextualRecall.py +2 -2
- validmind/tests/model_validation/FeaturesAUC.py +2 -2
- validmind/tests/model_validation/MeteorScore.py +2 -2
- validmind/tests/model_validation/ModelPredictionResiduals.py +2 -2
- validmind/tests/model_validation/RegardScore.py +6 -2
- validmind/tests/model_validation/RegressionResidualsPlot.py +4 -3
- validmind/tests/model_validation/RougeScore.py +6 -5
- validmind/tests/model_validation/TimeSeriesPredictionWithCI.py +11 -2
- validmind/tests/model_validation/TokenDisparity.py +2 -2
- validmind/tests/model_validation/ToxicityScore.py +10 -2
- validmind/tests/model_validation/embeddings/ClusterDistribution.py +9 -3
- validmind/tests/model_validation/embeddings/CosineSimilarityComparison.py +16 -2
- validmind/tests/model_validation/embeddings/CosineSimilarityDistribution.py +5 -3
- validmind/tests/model_validation/embeddings/CosineSimilarityHeatmap.py +2 -2
- validmind/tests/model_validation/embeddings/DescriptiveAnalytics.py +14 -4
- validmind/tests/model_validation/embeddings/EmbeddingsVisualization2D.py +2 -2
- validmind/tests/model_validation/embeddings/EuclideanDistanceComparison.py +16 -2
- validmind/tests/model_validation/embeddings/EuclideanDistanceHeatmap.py +2 -2
- validmind/tests/model_validation/embeddings/PCAComponentsPairwisePlots.py +4 -5
- validmind/tests/model_validation/embeddings/StabilityAnalysisKeyword.py +4 -2
- validmind/tests/model_validation/embeddings/StabilityAnalysisRandomNoise.py +4 -2
- validmind/tests/model_validation/embeddings/StabilityAnalysisSynonyms.py +4 -2
- validmind/tests/model_validation/embeddings/StabilityAnalysisTranslation.py +4 -2
- validmind/tests/model_validation/embeddings/TSNEComponentsPairwisePlots.py +8 -6
- validmind/tests/model_validation/embeddings/utils.py +11 -1
- validmind/tests/model_validation/ragas/AnswerCorrectness.py +2 -1
- validmind/tests/model_validation/ragas/AspectCritic.py +11 -7
- validmind/tests/model_validation/ragas/ContextEntityRecall.py +2 -1
- validmind/tests/model_validation/ragas/ContextPrecision.py +2 -1
- validmind/tests/model_validation/ragas/ContextPrecisionWithoutReference.py +2 -1
- validmind/tests/model_validation/ragas/ContextRecall.py +2 -1
- validmind/tests/model_validation/ragas/Faithfulness.py +2 -1
- validmind/tests/model_validation/ragas/NoiseSensitivity.py +2 -1
- validmind/tests/model_validation/ragas/ResponseRelevancy.py +2 -1
- validmind/tests/model_validation/ragas/SemanticSimilarity.py +2 -1
- validmind/tests/model_validation/sklearn/CalibrationCurve.py +3 -2
- validmind/tests/model_validation/sklearn/ClassifierThresholdOptimization.py +2 -5
- validmind/tests/model_validation/sklearn/ClusterCosineSimilarity.py +5 -2
- validmind/tests/model_validation/sklearn/ConfusionMatrix.py +2 -2
- validmind/tests/model_validation/sklearn/FeatureImportance.py +1 -14
- validmind/tests/model_validation/sklearn/HyperParametersTuning.py +6 -3
- validmind/tests/model_validation/sklearn/KMeansClustersOptimization.py +2 -2
- validmind/tests/model_validation/sklearn/MinimumROCAUCScore.py +8 -4
- validmind/tests/model_validation/sklearn/ModelParameters.py +1 -0
- validmind/tests/model_validation/sklearn/OverfitDiagnosis.py +3 -3
- validmind/tests/model_validation/sklearn/PermutationFeatureImportance.py +2 -2
- validmind/tests/model_validation/sklearn/PopulationStabilityIndex.py +20 -16
- validmind/tests/model_validation/sklearn/PrecisionRecallCurve.py +4 -2
- validmind/tests/model_validation/sklearn/ROCCurve.py +1 -1
- validmind/tests/model_validation/sklearn/RegressionR2Square.py +7 -9
- validmind/tests/model_validation/sklearn/RegressionR2SquareComparison.py +1 -3
- validmind/tests/model_validation/sklearn/SHAPGlobalImportance.py +2 -1
- validmind/tests/model_validation/sklearn/ScoreProbabilityAlignment.py +2 -1
- validmind/tests/model_validation/sklearn/SilhouettePlot.py +5 -3
- validmind/tests/model_validation/sklearn/TrainingTestDegradation.py +9 -1
- validmind/tests/model_validation/sklearn/WeakspotsDiagnosis.py +1 -1
- validmind/tests/model_validation/statsmodels/CumulativePredictionProbabilities.py +11 -4
- validmind/tests/model_validation/statsmodels/DurbinWatsonTest.py +1 -3
- validmind/tests/model_validation/statsmodels/GINITable.py +7 -15
- validmind/tests/model_validation/statsmodels/Lilliefors.py +2 -2
- validmind/tests/model_validation/statsmodels/RegressionCoeffs.py +1 -1
- validmind/tests/model_validation/statsmodels/RegressionFeatureSignificance.py +2 -2
- validmind/tests/model_validation/statsmodels/RegressionModelForecastPlotLevels.py +5 -2
- validmind/tests/model_validation/statsmodels/RegressionModelSensitivityPlot.py +5 -2
- validmind/tests/model_validation/statsmodels/RegressionModelSummary.py +7 -7
- validmind/tests/model_validation/statsmodels/RegressionPermutationFeatureImportance.py +2 -2
- validmind/tests/ongoing_monitoring/CalibrationCurveDrift.py +3 -1
- validmind/tests/ongoing_monitoring/ClassDiscriminationDrift.py +4 -2
- validmind/tests/ongoing_monitoring/ClassImbalanceDrift.py +4 -2
- validmind/tests/ongoing_monitoring/ClassificationAccuracyDrift.py +3 -1
- validmind/tests/ongoing_monitoring/ConfusionMatrixDrift.py +3 -1
- validmind/tests/ongoing_monitoring/CumulativePredictionProbabilitiesDrift.py +3 -1
- validmind/tests/ongoing_monitoring/FeatureDrift.py +1 -0
- validmind/tests/ongoing_monitoring/PredictionCorrelation.py +1 -0
- validmind/tests/ongoing_monitoring/PredictionProbabilitiesHistogramDrift.py +3 -1
- validmind/tests/ongoing_monitoring/PredictionQuantilesAcrossFeatures.py +1 -0
- validmind/tests/ongoing_monitoring/ROCCurveDrift.py +3 -2
- validmind/tests/ongoing_monitoring/ScoreBandsDrift.py +4 -2
- validmind/tests/ongoing_monitoring/ScorecardHistogramDrift.py +3 -1
- validmind/tests/ongoing_monitoring/TargetPredictionDistributionPlot.py +4 -3
- validmind/tests/prompt_validation/Bias.py +13 -9
- validmind/tests/prompt_validation/Clarity.py +13 -9
- validmind/tests/prompt_validation/Conciseness.py +13 -9
- validmind/tests/prompt_validation/Delimitation.py +13 -9
- validmind/tests/prompt_validation/NegativeInstruction.py +14 -11
- validmind/tests/prompt_validation/Robustness.py +6 -2
- validmind/tests/prompt_validation/Specificity.py +13 -9
- validmind/tests/run.py +6 -0
- validmind/utils.py +7 -8
- {validmind-2.7.6.dist-info → validmind-2.7.7.dist-info}/METADATA +1 -2
- {validmind-2.7.6.dist-info → validmind-2.7.7.dist-info}/RECORD +147 -147
- {validmind-2.7.6.dist-info → validmind-2.7.7.dist-info}/WHEEL +1 -1
- {validmind-2.7.6.dist-info → validmind-2.7.7.dist-info}/LICENSE +0 -0
- {validmind-2.7.6.dist-info → validmind-2.7.7.dist-info}/entry_points.txt +0 -0
@@ -5,7 +5,7 @@
|
|
5
5
|
import numpy as np
|
6
6
|
from sklearn.metrics.pairwise import cosine_similarity
|
7
7
|
|
8
|
-
from validmind import tags, tasks
|
8
|
+
from validmind import RawData, tags, tasks
|
9
9
|
from validmind.errors import SkipTestError
|
10
10
|
from validmind.vm_models import VMDataset, VMModel
|
11
11
|
|
@@ -61,11 +61,14 @@ def ClusterCosineSimilarity(model: VMModel, dataset: VMDataset):
|
|
61
61
|
|
62
62
|
table = []
|
63
63
|
|
64
|
+
cluster_centroids = {}
|
65
|
+
|
64
66
|
for cluster_idx in range(num_clusters):
|
65
67
|
cluster_data = dataset.x[y_pred == cluster_idx]
|
66
68
|
|
67
69
|
if cluster_data.size != 0:
|
68
70
|
cluster_centroid = np.mean(cluster_data, axis=0)
|
71
|
+
cluster_centroids[cluster_idx] = cluster_centroid
|
69
72
|
table.append(
|
70
73
|
{
|
71
74
|
"Cluster": cluster_idx,
|
@@ -81,4 +84,4 @@ def ClusterCosineSimilarity(model: VMModel, dataset: VMDataset):
|
|
81
84
|
if not table:
|
82
85
|
raise SkipTestError("No clusters found")
|
83
86
|
|
84
|
-
return table
|
87
|
+
return table, RawData(cluster_centroids=cluster_centroids)
|
@@ -7,7 +7,7 @@ import numpy as np
|
|
7
7
|
import plotly.figure_factory as ff
|
8
8
|
from sklearn.metrics import confusion_matrix
|
9
9
|
|
10
|
-
from validmind import tags, tasks
|
10
|
+
from validmind import RawData, tags, tasks
|
11
11
|
from validmind.vm_models import VMDataset, VMModel
|
12
12
|
|
13
13
|
|
@@ -119,4 +119,4 @@ def ConfusionMatrix(dataset: VMDataset, model: VMModel):
|
|
119
119
|
font=dict(size=14),
|
120
120
|
)
|
121
121
|
|
122
|
-
return fig
|
122
|
+
return fig, RawData(confusion_matrix=cm)
|
@@ -52,8 +52,6 @@ def FeatureImportance(dataset: VMDataset, model: VMModel, num_features: int = 3)
|
|
52
52
|
- The function's output is dependent on the number of features specified by `num_features`, which defaults to 3 but
|
53
53
|
can be adjusted.
|
54
54
|
"""
|
55
|
-
results_list = []
|
56
|
-
|
57
55
|
pfi_values = permutation_importance(
|
58
56
|
estimator=model.model,
|
59
57
|
X=dataset.x_df(),
|
@@ -61,8 +59,6 @@ def FeatureImportance(dataset: VMDataset, model: VMModel, num_features: int = 3)
|
|
61
59
|
random_state=0,
|
62
60
|
n_jobs=-2,
|
63
61
|
)
|
64
|
-
|
65
|
-
# Create a dictionary to store PFI scores
|
66
62
|
pfi = {
|
67
63
|
column: pfi_values["importances_mean"][i]
|
68
64
|
for i, column in enumerate(dataset.feature_columns)
|
@@ -70,14 +66,10 @@ def FeatureImportance(dataset: VMDataset, model: VMModel, num_features: int = 3)
|
|
70
66
|
|
71
67
|
# Sort features by their importance
|
72
68
|
sorted_features = sorted(pfi.items(), key=lambda item: item[1], reverse=True)
|
73
|
-
|
74
|
-
# Extract the top `num_features` features
|
75
69
|
top_features = sorted_features[:num_features]
|
76
70
|
|
77
|
-
# Prepare the result for the current model and dataset
|
78
71
|
result = {}
|
79
72
|
|
80
|
-
# Dynamically add feature columns to the result
|
81
73
|
for i in range(num_features):
|
82
74
|
if i < len(top_features):
|
83
75
|
result[
|
@@ -86,9 +78,4 @@ def FeatureImportance(dataset: VMDataset, model: VMModel, num_features: int = 3)
|
|
86
78
|
else:
|
87
79
|
result[f"Feature {i + 1}"] = None
|
88
80
|
|
89
|
-
|
90
|
-
results_list.append(result)
|
91
|
-
|
92
|
-
# Convert the results list to a DataFrame
|
93
|
-
results_df = pd.DataFrame(results_list)
|
94
|
-
return results_df
|
81
|
+
return pd.DataFrame([result])
|
@@ -2,9 +2,10 @@
|
|
2
2
|
# See the LICENSE file in the root of this repository for details.
|
3
3
|
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
4
|
|
5
|
-
from typing import
|
6
|
-
|
5
|
+
from typing import Dict, List, Union
|
6
|
+
|
7
7
|
from sklearn.metrics import make_scorer, recall_score
|
8
|
+
from sklearn.model_selection import GridSearchCV
|
8
9
|
|
9
10
|
from validmind import tags, tasks
|
10
11
|
from validmind.vm_models import VMDataset, VMModel
|
@@ -24,7 +25,9 @@ def _get_metrics(scoring):
|
|
24
25
|
return (
|
25
26
|
scoring
|
26
27
|
if isinstance(scoring, list)
|
27
|
-
else list(scoring.keys())
|
28
|
+
else list(scoring.keys())
|
29
|
+
if isinstance(scoring, dict)
|
30
|
+
else [scoring]
|
28
31
|
)
|
29
32
|
|
30
33
|
|
@@ -11,7 +11,7 @@ from scipy.spatial.distance import cdist
|
|
11
11
|
from sklearn import clone
|
12
12
|
from sklearn.metrics import silhouette_score
|
13
13
|
|
14
|
-
from validmind import tags, tasks
|
14
|
+
from validmind import RawData, tags, tasks
|
15
15
|
from validmind.errors import SkipTestError
|
16
16
|
from validmind.vm_models import VMDataset, VMModel
|
17
17
|
|
@@ -124,4 +124,4 @@ def KMeansClustersOptimization(
|
|
124
124
|
|
125
125
|
fig.update_layout(showlegend=False)
|
126
126
|
|
127
|
-
return fig
|
127
|
+
return fig, RawData(distortions=distortions, silhouette_avg=silhouette_avg)
|
@@ -6,7 +6,7 @@ import numpy as np
|
|
6
6
|
from sklearn.metrics import roc_auc_score
|
7
7
|
from sklearn.preprocessing import LabelBinarizer
|
8
8
|
|
9
|
-
from validmind
|
9
|
+
from validmind import tags, tasks
|
10
10
|
from validmind.vm_models import VMDataset, VMModel
|
11
11
|
|
12
12
|
|
@@ -62,14 +62,18 @@ def MinimumROCAUCScore(dataset: VMDataset, model: VMModel, min_threshold: float
|
|
62
62
|
lb = LabelBinarizer()
|
63
63
|
lb.fit(y_true)
|
64
64
|
|
65
|
+
y_true_binarized = lb.transform(y_true)
|
66
|
+
y_score_binarized = lb.transform(dataset.y_pred(model))
|
67
|
+
|
65
68
|
roc_auc = roc_auc_score(
|
66
|
-
y_true=
|
67
|
-
y_score=
|
69
|
+
y_true=y_true_binarized,
|
70
|
+
y_score=y_score_binarized,
|
68
71
|
average="macro",
|
69
72
|
)
|
70
73
|
|
71
74
|
else:
|
72
|
-
|
75
|
+
y_score_prob = dataset.y_prob(model)
|
76
|
+
roc_auc = roc_auc_score(y_true=y_true, y_score=y_score_prob)
|
73
77
|
|
74
78
|
return [
|
75
79
|
{
|
@@ -242,7 +242,7 @@ def OverfitDiagnosis(
|
|
242
242
|
test_df[prob_column] = datasets[1].y_prob(model)
|
243
243
|
|
244
244
|
test_results = []
|
245
|
-
|
245
|
+
figures = []
|
246
246
|
results_headers = ["slice", "shape", "feature", metric]
|
247
247
|
|
248
248
|
for feature_column in datasets[0].feature_columns:
|
@@ -283,7 +283,7 @@ def OverfitDiagnosis(
|
|
283
283
|
)
|
284
284
|
|
285
285
|
results = _prepare_results(results_train, results_test, metric)
|
286
|
-
|
286
|
+
figures.append(
|
287
287
|
_plot_overfit_regions(results, feature_column, cut_off_threshold, metric)
|
288
288
|
)
|
289
289
|
|
@@ -299,4 +299,4 @@ def OverfitDiagnosis(
|
|
299
299
|
}
|
300
300
|
)
|
301
301
|
|
302
|
-
return {"Overfit Diagnosis": test_results}, *
|
302
|
+
return ({"Overfit Diagnosis": test_results}, *figures)
|
@@ -7,7 +7,7 @@ from typing import Union
|
|
7
7
|
import plotly.graph_objects as go
|
8
8
|
from sklearn.inspection import permutation_importance
|
9
9
|
|
10
|
-
from validmind import tags, tasks
|
10
|
+
from validmind import RawData, tags, tasks
|
11
11
|
from validmind.errors import SkipTestError
|
12
12
|
from validmind.logging import get_logger
|
13
13
|
from validmind.vm_models import VMDataset, VMModel
|
@@ -111,4 +111,4 @@ def PermutationFeatureImportance(
|
|
111
111
|
height=figure_height,
|
112
112
|
)
|
113
113
|
|
114
|
-
return fig
|
114
|
+
return fig, RawData(permutation_importance=pfi_values)
|
@@ -8,7 +8,7 @@ import numpy as np
|
|
8
8
|
import pandas as pd
|
9
9
|
import plotly.graph_objects as go
|
10
10
|
|
11
|
-
from validmind import tags, tasks
|
11
|
+
from validmind import RawData, tags, tasks
|
12
12
|
from validmind.errors import SkipTestError
|
13
13
|
from validmind.logging import get_logger
|
14
14
|
from validmind.vm_models import VMDataset, VMModel
|
@@ -192,18 +192,22 @@ def PopulationStabilityIndex(
|
|
192
192
|
|
193
193
|
table_title = f"Population Stability Index for {datasets[0].input_id} and {datasets[1].input_id} Datasets"
|
194
194
|
|
195
|
-
return
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
|
209
|
-
|
195
|
+
return (
|
196
|
+
{
|
197
|
+
table_title: [
|
198
|
+
{
|
199
|
+
"Bin": (
|
200
|
+
i if i < (len(psi_results) - 1) else "Total"
|
201
|
+
), # The last bin is the "Total" bin
|
202
|
+
"Count Initial": values["initial"],
|
203
|
+
"Percent Initial (%)": values["percent_initial"] * 100,
|
204
|
+
"Count New": values["new"],
|
205
|
+
"Percent New (%)": values["percent_new"] * 100,
|
206
|
+
"PSI": values["psi"],
|
207
|
+
}
|
208
|
+
for i, values in enumerate(psi_results)
|
209
|
+
],
|
210
|
+
},
|
211
|
+
fig,
|
212
|
+
RawData(psi_raw=psi_results),
|
213
|
+
)
|
@@ -6,7 +6,7 @@ import numpy as np
|
|
6
6
|
import plotly.graph_objects as go
|
7
7
|
from sklearn.metrics import precision_recall_curve
|
8
8
|
|
9
|
-
from validmind import tags, tasks
|
9
|
+
from validmind import RawData, tags, tasks
|
10
10
|
from validmind.errors import SkipTestError
|
11
11
|
from validmind.models import FoundationModel
|
12
12
|
from validmind.vm_models import VMDataset, VMModel
|
@@ -66,7 +66,7 @@ def PrecisionRecallCurve(model: VMModel, dataset: VMDataset):
|
|
66
66
|
|
67
67
|
precision, recall, _ = precision_recall_curve(y_true, dataset.y_prob(model))
|
68
68
|
|
69
|
-
|
69
|
+
fig = go.Figure(
|
70
70
|
data=[
|
71
71
|
go.Scatter(
|
72
72
|
x=recall,
|
@@ -82,3 +82,5 @@ def PrecisionRecallCurve(model: VMModel, dataset: VMDataset):
|
|
82
82
|
yaxis=dict(title="Precision"),
|
83
83
|
),
|
84
84
|
)
|
85
|
+
|
86
|
+
return fig, RawData(precision=precision, recall=recall)
|
@@ -78,7 +78,6 @@ def ROCCurve(model: VMModel, dataset: VMDataset):
|
|
78
78
|
auc = roc_auc_score(y_true, y_prob)
|
79
79
|
|
80
80
|
return (
|
81
|
-
RawData(fpr=fpr, tpr=tpr, auc=auc),
|
82
81
|
go.Figure(
|
83
82
|
data=[
|
84
83
|
go.Scatter(
|
@@ -104,4 +103,5 @@ def ROCCurve(model: VMModel, dataset: VMDataset):
|
|
104
103
|
height=500,
|
105
104
|
),
|
106
105
|
),
|
106
|
+
RawData(fpr=fpr, tpr=tpr, auc=auc),
|
107
107
|
)
|
@@ -51,17 +51,15 @@ def RegressionR2Square(dataset, model):
|
|
51
51
|
violated.
|
52
52
|
- Does not provide insight on whether the correct regression model was used or if key assumptions have been met.
|
53
53
|
"""
|
54
|
-
|
55
54
|
y_true = dataset.y
|
56
55
|
y_pred = dataset.y_pred(model)
|
57
56
|
y_true = y_true.astype(y_pred.dtype)
|
58
57
|
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
58
|
+
return pd.DataFrame(
|
59
|
+
{
|
60
|
+
"R-squared (R2) Score": [metrics.r2_score(y_true, y_pred)],
|
61
|
+
"Adjusted R-squared (R2) Score": [
|
62
|
+
adj_r2_score(y_true, y_pred, len(y_true), len(dataset.feature_columns))
|
63
|
+
],
|
64
|
+
}
|
65
65
|
)
|
66
|
-
|
67
|
-
return results_df
|
@@ -9,7 +9,7 @@ import matplotlib.pyplot as plt
|
|
9
9
|
import numpy as np
|
10
10
|
import shap
|
11
11
|
|
12
|
-
from validmind import tags, tasks
|
12
|
+
from validmind import RawData, tags, tasks
|
13
13
|
from validmind.errors import UnsupportedModelForSHAPError
|
14
14
|
from validmind.logging import get_logger
|
15
15
|
from validmind.models import CatBoostModel, SKlearnModel, StatsModelsModel
|
@@ -229,4 +229,5 @@ def SHAPGlobalImportance(
|
|
229
229
|
return (
|
230
230
|
generate_shap_plot("mean", shap_values, shap_sample),
|
231
231
|
generate_shap_plot("summary", shap_values, shap_sample),
|
232
|
+
RawData(shap_values=shap_values, shap_sample=shap_sample),
|
232
233
|
)
|
@@ -4,8 +4,9 @@
|
|
4
4
|
|
5
5
|
import pandas as pd
|
6
6
|
import plotly.graph_objects as go
|
7
|
+
|
7
8
|
from validmind import tags, tasks
|
8
|
-
from validmind.vm_models import
|
9
|
+
from validmind.vm_models import VMDataset, VMModel
|
9
10
|
|
10
11
|
|
11
12
|
@tags("visualization", "credit_risk", "calibration")
|
@@ -6,7 +6,7 @@ import matplotlib.pyplot as plt
|
|
6
6
|
import numpy as np
|
7
7
|
from sklearn.metrics import silhouette_samples, silhouette_score
|
8
8
|
|
9
|
-
from validmind import tags, tasks
|
9
|
+
from validmind import RawData, tags, tasks
|
10
10
|
from validmind.vm_models import VMDataset, VMModel
|
11
11
|
|
12
12
|
|
@@ -105,8 +105,10 @@ def SilhouettePlot(model: VMModel, dataset: VMDataset):
|
|
105
105
|
|
106
106
|
plt.close()
|
107
107
|
|
108
|
-
return
|
108
|
+
return (
|
109
109
|
{
|
110
110
|
"Silhouette Score": silhouette_avg,
|
111
111
|
},
|
112
|
-
|
112
|
+
fig,
|
113
|
+
RawData(sample_silhouette_values=sample_silhouette_values),
|
114
|
+
)
|
@@ -7,6 +7,7 @@ from typing import List
|
|
7
7
|
from numpy import unique
|
8
8
|
from sklearn.metrics import classification_report
|
9
9
|
|
10
|
+
from validmind import RawData
|
10
11
|
from validmind.tests import tags, tasks
|
11
12
|
from validmind.vm_models import VMDataset, VMModel
|
12
13
|
|
@@ -98,4 +99,11 @@ def TrainingTestDegradation(
|
|
98
99
|
}
|
99
100
|
)
|
100
101
|
|
101
|
-
return
|
102
|
+
return (
|
103
|
+
table,
|
104
|
+
all(row["Pass/Fail"] == "Pass" for row in table),
|
105
|
+
RawData(
|
106
|
+
dataset_1_report=ds1_report,
|
107
|
+
dataset_2_report=ds2_report,
|
108
|
+
),
|
109
|
+
)
|
@@ -6,7 +6,7 @@ import numpy as np
|
|
6
6
|
import plotly.graph_objects as go
|
7
7
|
from matplotlib import cm
|
8
8
|
|
9
|
-
from validmind import tags, tasks
|
9
|
+
from validmind import RawData, tags, tasks
|
10
10
|
|
11
11
|
|
12
12
|
@tags("visualization", "credit_risk")
|
@@ -62,9 +62,9 @@ def CumulativePredictionProbabilities(dataset, model, title="Cumulative Probabil
|
|
62
62
|
df = dataset.df
|
63
63
|
df["probabilities"] = dataset.y_prob(model)
|
64
64
|
|
65
|
-
fig = _plot_cumulative_prob(df, dataset.target_column, title)
|
65
|
+
fig, fig_data = _plot_cumulative_prob(df, dataset.target_column, title)
|
66
66
|
|
67
|
-
return fig
|
67
|
+
return fig, RawData(cumulative_probabilities=fig_data)
|
68
68
|
|
69
69
|
|
70
70
|
def _plot_cumulative_prob(df, target_col, title):
|
@@ -82,10 +82,17 @@ def _plot_cumulative_prob(df, target_col, title):
|
|
82
82
|
cls: f"rgb({int(rgb[0]*255)}, {int(rgb[1]*255)}, {int(rgb[2]*255)})"
|
83
83
|
for cls, rgb in zip(classes, colors)
|
84
84
|
}
|
85
|
+
|
86
|
+
raw_data = {}
|
87
|
+
|
85
88
|
for class_value in sorted(df[target_col].unique()):
|
86
89
|
# Calculate cumulative distribution for the current class
|
87
90
|
sorted_probs = np.sort(df[df[target_col] == class_value]["probabilities"])
|
88
91
|
cumulative_probs = np.cumsum(sorted_probs) / np.sum(sorted_probs)
|
92
|
+
raw_data[class_value] = {
|
93
|
+
"sorted_probs": sorted_probs,
|
94
|
+
"cumulative_probs": cumulative_probs,
|
95
|
+
}
|
89
96
|
|
90
97
|
fig.add_trace(
|
91
98
|
go.Scatter(
|
@@ -104,4 +111,4 @@ def _plot_cumulative_prob(df, target_col, title):
|
|
104
111
|
yaxis_title="Cumulative Distribution",
|
105
112
|
)
|
106
113
|
|
107
|
-
return fig
|
114
|
+
return fig, raw_data
|
@@ -75,12 +75,10 @@ def DurbinWatsonTest(dataset, model, threshold=[1.5, 2.5]):
|
|
75
75
|
else:
|
76
76
|
return "No autocorrelation"
|
77
77
|
|
78
|
-
|
78
|
+
return pd.DataFrame(
|
79
79
|
{
|
80
80
|
"dw_statistic": [dw_statistic],
|
81
81
|
"threshold": [str(threshold)],
|
82
82
|
"autocorrelation": [get_autocorrelation(dw_statistic, threshold)],
|
83
83
|
}
|
84
84
|
)
|
85
|
-
|
86
|
-
return results
|
@@ -61,27 +61,19 @@ def GINITable(dataset, model):
|
|
61
61
|
- The test does not incorporate a method to efficiently handle missing or inefficiently processed data, which could
|
62
62
|
lead to inaccuracies in the metrics if the data is not appropriately preprocessed.
|
63
63
|
"""
|
64
|
-
|
65
|
-
metrics_dict = {"AUC": [], "GINI": [], "KS": []}
|
66
|
-
|
67
|
-
# Retrieve y_true and y_pred for the current dataset
|
68
64
|
y_true = np.ravel(dataset.y) # Flatten y_true to make it one-dimensional
|
69
65
|
y_prob = dataset.y_prob(model)
|
70
|
-
|
71
|
-
# Compute metrics
|
72
66
|
y_true = np.array(y_true, dtype=float)
|
73
67
|
y_prob = np.array(y_prob, dtype=float)
|
74
68
|
|
75
69
|
fpr, tpr, _ = roc_curve(y_true, y_prob)
|
76
|
-
ks = max(tpr - fpr)
|
77
70
|
auc = roc_auc_score(y_true, y_prob)
|
78
71
|
gini = 2 * auc - 1
|
79
72
|
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
return metrics_df
|
73
|
+
return pd.DataFrame(
|
74
|
+
{
|
75
|
+
"AUC": [auc],
|
76
|
+
"GINI": [gini],
|
77
|
+
"KS": [max(tpr - fpr)],
|
78
|
+
}
|
79
|
+
)
|
@@ -5,12 +5,12 @@
|
|
5
5
|
from statsmodels.stats.diagnostic import lilliefors
|
6
6
|
|
7
7
|
from validmind import tags, tasks
|
8
|
-
from validmind.vm_models import VMDataset
|
8
|
+
from validmind.vm_models import VMDataset
|
9
9
|
|
10
10
|
|
11
11
|
@tags("tabular_data", "data_distribution", "statistical_test", "statsmodels")
|
12
12
|
@tasks("classification", "regression")
|
13
|
-
def Lilliefors(
|
13
|
+
def Lilliefors(dataset: VMDataset):
|
14
14
|
"""
|
15
15
|
Assesses the normality of feature distributions in an ML model's training dataset using the Lilliefors test.
|
16
16
|
|
@@ -5,7 +5,7 @@
|
|
5
5
|
import matplotlib.pyplot as plt
|
6
6
|
import seaborn as sns
|
7
7
|
|
8
|
-
from validmind import tags, tasks
|
8
|
+
from validmind import RawData, tags, tasks
|
9
9
|
from validmind.errors import SkipTestError
|
10
10
|
from validmind.logging import get_logger
|
11
11
|
from validmind.vm_models import VMModel
|
@@ -90,4 +90,4 @@ def RegressionFeatureSignificance(
|
|
90
90
|
|
91
91
|
plt.close()
|
92
92
|
|
93
|
-
return fig
|
93
|
+
return fig, RawData(coefficients=coefficients, pvalues=pvalues)
|
@@ -6,7 +6,7 @@ import matplotlib.pyplot as plt
|
|
6
6
|
import numpy as np
|
7
7
|
import pandas as pd
|
8
8
|
|
9
|
-
from validmind import tags, tasks
|
9
|
+
from validmind import RawData, tags, tasks
|
10
10
|
from validmind.vm_models import VMDataset, VMModel
|
11
11
|
|
12
12
|
|
@@ -91,4 +91,7 @@ def RegressionModelForecastPlotLevels(
|
|
91
91
|
|
92
92
|
plt.close()
|
93
93
|
|
94
|
-
return fig
|
94
|
+
return fig, RawData(
|
95
|
+
y_transformed=dataset_y_transformed,
|
96
|
+
y_pred_transformed=y_pred_transformed,
|
97
|
+
)
|
@@ -7,7 +7,7 @@ from typing import List, Union
|
|
7
7
|
import matplotlib.pyplot as plt
|
8
8
|
import numpy as np
|
9
9
|
|
10
|
-
from validmind import tags, tasks
|
10
|
+
from validmind import RawData, tags, tasks
|
11
11
|
from validmind.logging import get_logger
|
12
12
|
from validmind.vm_models import VMDataset, VMModel
|
13
13
|
|
@@ -111,4 +111,7 @@ def RegressionModelSensitivityPlot(
|
|
111
111
|
|
112
112
|
plt.close()
|
113
113
|
|
114
|
-
return fig
|
114
|
+
return fig, RawData(
|
115
|
+
transformed_target=transformed_target,
|
116
|
+
transformed_predictions=transformed_predictions,
|
117
|
+
)
|
@@ -45,17 +45,17 @@ def RegressionModelSummary(dataset: VMDataset, model: VMModel):
|
|
45
45
|
- A high R-Squared or Adjusted R-Squared may not necessarily indicate a good model, especially in cases of
|
46
46
|
overfitting.
|
47
47
|
"""
|
48
|
-
y_true = dataset.y
|
49
|
-
y_pred = dataset.y_pred(model)
|
50
|
-
|
51
48
|
return [
|
52
49
|
{
|
53
50
|
"Independent Variables": dataset.feature_columns,
|
54
|
-
"R-Squared": r2_score(
|
51
|
+
"R-Squared": r2_score(dataset.y, dataset.y_pred(model)),
|
55
52
|
"Adjusted R-Squared": adj_r2_score(
|
56
|
-
|
53
|
+
dataset.y,
|
54
|
+
dataset.y_pred(model),
|
55
|
+
len(dataset.y),
|
56
|
+
len(dataset.feature_columns),
|
57
57
|
),
|
58
|
-
"MSE": mean_squared_error(
|
59
|
-
"RMSE": mean_squared_error(
|
58
|
+
"MSE": mean_squared_error(dataset.y, dataset.y_pred(model), squared=True),
|
59
|
+
"RMSE": mean_squared_error(dataset.y, dataset.y_pred(model), squared=False),
|
60
60
|
}
|
61
61
|
]
|
@@ -8,7 +8,7 @@ import plotly.graph_objects as go
|
|
8
8
|
from sklearn.metrics import r2_score
|
9
9
|
from sklearn.utils import check_random_state
|
10
10
|
|
11
|
-
from validmind import tags, tasks
|
11
|
+
from validmind import RawData, tags, tasks
|
12
12
|
from validmind.logging import get_logger
|
13
13
|
from validmind.vm_models import VMDataset, VMModel
|
14
14
|
|
@@ -97,4 +97,4 @@ def RegressionPermutationFeatureImportance(
|
|
97
97
|
height=figure_height,
|
98
98
|
)
|
99
99
|
|
100
|
-
return fig
|
100
|
+
return fig, RawData(importances=importances)
|
@@ -2,11 +2,13 @@
|
|
2
2
|
# See the LICENSE file in the root of this repository for details.
|
3
3
|
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
4
|
|
5
|
+
from typing import List
|
6
|
+
|
5
7
|
import numpy as np
|
6
8
|
import pandas as pd
|
7
9
|
import plotly.graph_objects as go
|
8
10
|
from sklearn.calibration import calibration_curve
|
9
|
-
|
11
|
+
|
10
12
|
from validmind import tags, tasks
|
11
13
|
from validmind.errors import SkipTestError
|
12
14
|
from validmind.vm_models import VMDataset, VMModel
|
@@ -2,12 +2,14 @@
|
|
2
2
|
# See the LICENSE file in the root of this repository for details.
|
3
3
|
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
4
|
|
5
|
+
from typing import List
|
6
|
+
|
5
7
|
import numpy as np
|
6
8
|
import pandas as pd
|
9
|
+
from scipy import stats
|
7
10
|
from sklearn.metrics import roc_auc_score
|
8
11
|
from sklearn.preprocessing import LabelBinarizer
|
9
|
-
|
10
|
-
from typing import List
|
12
|
+
|
11
13
|
from validmind import tags, tasks
|
12
14
|
from validmind.vm_models import VMDataset, VMModel
|
13
15
|
|