validmind 2.7.12__py3-none-any.whl → 2.8.12__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- validmind/__init__.py +58 -10
- validmind/__version__.py +1 -1
- validmind/ai/test_descriptions.py +17 -73
- validmind/api_client.py +18 -1
- validmind/models/r_model.py +5 -1
- validmind/tests/comparison.py +28 -2
- validmind/tests/data_validation/ACFandPACFPlot.py +4 -1
- validmind/tests/data_validation/AutoMA.py +1 -1
- validmind/tests/data_validation/BivariateScatterPlots.py +5 -1
- validmind/tests/data_validation/BoxPierce.py +3 -1
- validmind/tests/data_validation/ClassImbalance.py +1 -1
- validmind/tests/data_validation/DatasetDescription.py +1 -1
- validmind/tests/data_validation/DickeyFullerGLS.py +1 -1
- validmind/tests/data_validation/FeatureTargetCorrelationPlot.py +5 -10
- validmind/tests/data_validation/HighCardinality.py +5 -1
- validmind/tests/data_validation/HighPearsonCorrelation.py +1 -1
- validmind/tests/data_validation/IQROutliersBarPlot.py +5 -3
- validmind/tests/data_validation/IQROutliersTable.py +5 -2
- validmind/tests/data_validation/IsolationForestOutliers.py +5 -4
- validmind/tests/data_validation/JarqueBera.py +2 -2
- validmind/tests/data_validation/LJungBox.py +2 -2
- validmind/tests/data_validation/LaggedCorrelationHeatmap.py +1 -1
- validmind/tests/data_validation/MissingValues.py +14 -10
- validmind/tests/data_validation/MissingValuesBarPlot.py +3 -1
- validmind/tests/data_validation/MutualInformation.py +2 -1
- validmind/tests/data_validation/PearsonCorrelationMatrix.py +1 -1
- validmind/tests/data_validation/ProtectedClassesCombination.py +2 -0
- validmind/tests/data_validation/ProtectedClassesDescription.py +2 -2
- validmind/tests/data_validation/ProtectedClassesDisparity.py +9 -5
- validmind/tests/data_validation/ProtectedClassesThresholdOptimizer.py +10 -2
- validmind/tests/data_validation/RollingStatsPlot.py +2 -1
- validmind/tests/data_validation/ScoreBandDefaultRates.py +4 -2
- validmind/tests/data_validation/SeasonalDecompose.py +1 -1
- validmind/tests/data_validation/ShapiroWilk.py +2 -2
- validmind/tests/data_validation/SpreadPlot.py +1 -1
- validmind/tests/data_validation/TabularCategoricalBarPlots.py +1 -1
- validmind/tests/data_validation/TabularDateTimeHistograms.py +1 -1
- validmind/tests/data_validation/TargetRateBarPlots.py +4 -1
- validmind/tests/data_validation/TimeSeriesFrequency.py +1 -1
- validmind/tests/data_validation/TimeSeriesOutliers.py +7 -2
- validmind/tests/data_validation/WOEBinPlots.py +1 -1
- validmind/tests/data_validation/WOEBinTable.py +1 -1
- validmind/tests/data_validation/ZivotAndrewsArch.py +5 -2
- validmind/tests/data_validation/nlp/CommonWords.py +1 -1
- validmind/tests/data_validation/nlp/Hashtags.py +1 -1
- validmind/tests/data_validation/nlp/LanguageDetection.py +1 -1
- validmind/tests/data_validation/nlp/Mentions.py +1 -1
- validmind/tests/data_validation/nlp/PolarityAndSubjectivity.py +5 -1
- validmind/tests/data_validation/nlp/Punctuations.py +1 -1
- validmind/tests/data_validation/nlp/Sentiment.py +3 -1
- validmind/tests/data_validation/nlp/TextDescription.py +1 -1
- validmind/tests/data_validation/nlp/Toxicity.py +1 -1
- validmind/tests/model_validation/BertScore.py +7 -1
- validmind/tests/model_validation/BleuScore.py +7 -1
- validmind/tests/model_validation/ClusterSizeDistribution.py +3 -1
- validmind/tests/model_validation/ContextualRecall.py +9 -1
- validmind/tests/model_validation/FeaturesAUC.py +1 -1
- validmind/tests/model_validation/MeteorScore.py +7 -1
- validmind/tests/model_validation/ModelPredictionResiduals.py +5 -1
- validmind/tests/model_validation/RegardScore.py +6 -1
- validmind/tests/model_validation/RegressionResidualsPlot.py +10 -1
- validmind/tests/model_validation/RougeScore.py +3 -1
- validmind/tests/model_validation/TimeSeriesPredictionWithCI.py +2 -0
- validmind/tests/model_validation/TimeSeriesPredictionsPlot.py +10 -2
- validmind/tests/model_validation/TimeSeriesR2SquareBySegments.py +6 -2
- validmind/tests/model_validation/TokenDisparity.py +5 -1
- validmind/tests/model_validation/ToxicityScore.py +3 -1
- validmind/tests/model_validation/embeddings/ClusterDistribution.py +1 -1
- validmind/tests/model_validation/embeddings/CosineSimilarityComparison.py +5 -1
- validmind/tests/model_validation/embeddings/CosineSimilarityDistribution.py +5 -1
- validmind/tests/model_validation/embeddings/CosineSimilarityHeatmap.py +5 -1
- validmind/tests/model_validation/embeddings/DescriptiveAnalytics.py +2 -0
- validmind/tests/model_validation/embeddings/EmbeddingsVisualization2D.py +5 -1
- validmind/tests/model_validation/embeddings/EuclideanDistanceComparison.py +6 -2
- validmind/tests/model_validation/embeddings/EuclideanDistanceHeatmap.py +3 -1
- validmind/tests/model_validation/embeddings/PCAComponentsPairwisePlots.py +4 -1
- validmind/tests/model_validation/embeddings/StabilityAnalysisKeyword.py +5 -1
- validmind/tests/model_validation/embeddings/StabilityAnalysisRandomNoise.py +5 -1
- validmind/tests/model_validation/embeddings/StabilityAnalysisSynonyms.py +5 -1
- validmind/tests/model_validation/embeddings/StabilityAnalysisTranslation.py +5 -1
- validmind/tests/model_validation/embeddings/TSNEComponentsPairwisePlots.py +6 -1
- validmind/tests/model_validation/embeddings/utils.py +6 -9
- validmind/tests/model_validation/ragas/AnswerCorrectness.py +1 -1
- validmind/tests/model_validation/ragas/AspectCritic.py +4 -1
- validmind/tests/model_validation/ragas/ContextEntityRecall.py +1 -1
- validmind/tests/model_validation/ragas/ContextPrecision.py +1 -1
- validmind/tests/model_validation/ragas/ContextPrecisionWithoutReference.py +1 -1
- validmind/tests/model_validation/ragas/ContextRecall.py +1 -1
- validmind/tests/model_validation/ragas/Faithfulness.py +1 -1
- validmind/tests/model_validation/ragas/NoiseSensitivity.py +1 -1
- validmind/tests/model_validation/ragas/ResponseRelevancy.py +1 -1
- validmind/tests/model_validation/ragas/SemanticSimilarity.py +1 -1
- validmind/tests/model_validation/ragas/utils.py +8 -7
- validmind/tests/model_validation/sklearn/AdjustedMutualInformation.py +9 -9
- validmind/tests/model_validation/sklearn/AdjustedRandIndex.py +9 -9
- validmind/tests/model_validation/sklearn/CalibrationCurve.py +5 -2
- validmind/tests/model_validation/sklearn/ClassifierThresholdOptimization.py +15 -2
- validmind/tests/model_validation/sklearn/ClusterCosineSimilarity.py +5 -1
- validmind/tests/model_validation/sklearn/ClusterPerformanceMetrics.py +24 -14
- validmind/tests/model_validation/sklearn/CompletenessScore.py +8 -9
- validmind/tests/model_validation/sklearn/ConfusionMatrix.py +22 -3
- validmind/tests/model_validation/sklearn/FeatureImportance.py +6 -2
- validmind/tests/model_validation/sklearn/FowlkesMallowsScore.py +12 -9
- validmind/tests/model_validation/sklearn/HomogeneityScore.py +14 -9
- validmind/tests/model_validation/sklearn/HyperParametersTuning.py +4 -2
- validmind/tests/model_validation/sklearn/KMeansClustersOptimization.py +6 -1
- validmind/tests/model_validation/sklearn/MinimumAccuracy.py +12 -7
- validmind/tests/model_validation/sklearn/MinimumF1Score.py +12 -7
- validmind/tests/model_validation/sklearn/MinimumROCAUCScore.py +18 -7
- validmind/tests/model_validation/sklearn/OverfitDiagnosis.py +8 -2
- validmind/tests/model_validation/sklearn/PermutationFeatureImportance.py +5 -1
- validmind/tests/model_validation/sklearn/PopulationStabilityIndex.py +5 -1
- validmind/tests/model_validation/sklearn/PrecisionRecallCurve.py +6 -1
- validmind/tests/model_validation/sklearn/ROCCurve.py +3 -1
- validmind/tests/model_validation/sklearn/RegressionErrors.py +6 -2
- validmind/tests/model_validation/sklearn/RegressionPerformance.py +13 -8
- validmind/tests/model_validation/sklearn/RegressionR2Square.py +8 -5
- validmind/tests/model_validation/sklearn/RobustnessDiagnosis.py +5 -1
- validmind/tests/model_validation/sklearn/SHAPGlobalImportance.py +6 -1
- validmind/tests/model_validation/sklearn/ScoreProbabilityAlignment.py +10 -2
- validmind/tests/model_validation/sklearn/SilhouettePlot.py +5 -1
- validmind/tests/model_validation/sklearn/VMeasure.py +12 -9
- validmind/tests/model_validation/statsmodels/CumulativePredictionProbabilities.py +5 -1
- validmind/tests/model_validation/statsmodels/DurbinWatsonTest.py +6 -1
- validmind/tests/model_validation/statsmodels/GINITable.py +8 -1
- validmind/tests/model_validation/statsmodels/KolmogorovSmirnov.py +2 -2
- validmind/tests/model_validation/statsmodels/PredictionProbabilitiesHistogram.py +6 -2
- validmind/tests/model_validation/statsmodels/RegressionCoeffs.py +8 -2
- validmind/tests/model_validation/statsmodels/RegressionFeatureSignificance.py +3 -1
- validmind/tests/model_validation/statsmodels/RegressionModelForecastPlot.py +7 -2
- validmind/tests/model_validation/statsmodels/RegressionModelForecastPlotLevels.py +2 -0
- validmind/tests/model_validation/statsmodels/RegressionModelSensitivityPlot.py +2 -0
- validmind/tests/model_validation/statsmodels/RegressionModelSummary.py +11 -9
- validmind/tests/model_validation/statsmodels/RegressionPermutationFeatureImportance.py +3 -1
- validmind/tests/ongoing_monitoring/CalibrationCurveDrift.py +11 -1
- validmind/tests/ongoing_monitoring/ClassificationAccuracyDrift.py +10 -2
- validmind/tests/ongoing_monitoring/ConfusionMatrixDrift.py +8 -1
- validmind/tests/ongoing_monitoring/CumulativePredictionProbabilitiesDrift.py +18 -2
- validmind/tests/ongoing_monitoring/FeatureDrift.py +9 -2
- validmind/tests/ongoing_monitoring/PredictionAcrossEachFeature.py +8 -2
- validmind/tests/ongoing_monitoring/PredictionCorrelation.py +13 -2
- validmind/tests/ongoing_monitoring/PredictionProbabilitiesHistogramDrift.py +13 -2
- validmind/tests/ongoing_monitoring/ROCCurveDrift.py +16 -2
- validmind/tests/ongoing_monitoring/ScoreBandsDrift.py +11 -2
- validmind/tests/ongoing_monitoring/TargetPredictionDistributionPlot.py +13 -2
- validmind/tests/prompt_validation/Clarity.py +1 -1
- validmind/tests/prompt_validation/NegativeInstruction.py +1 -1
- validmind/tests/prompt_validation/Robustness.py +6 -1
- validmind/tests/prompt_validation/Specificity.py +1 -1
- validmind/tests/prompt_validation/ai_powered_test.py +5 -4
- validmind/tests/run.py +5 -1
- validmind/utils.py +13 -0
- validmind/vm_models/result/result.py +43 -2
- {validmind-2.7.12.dist-info → validmind-2.8.12.dist-info}/METADATA +3 -2
- {validmind-2.7.12.dist-info → validmind-2.8.12.dist-info}/RECORD +158 -163
- validmind/ai/test_result_description/config.yaml +0 -29
- validmind/ai/test_result_description/context.py +0 -73
- validmind/ai/test_result_description/image_processing.py +0 -124
- validmind/ai/test_result_description/system.jinja +0 -39
- validmind/ai/test_result_description/user.jinja +0 -30
- {validmind-2.7.12.dist-info → validmind-2.8.12.dist-info}/LICENSE +0 -0
- {validmind-2.7.12.dist-info → validmind-2.8.12.dist-info}/WHEEL +0 -0
- {validmind-2.7.12.dist-info → validmind-2.8.12.dist-info}/entry_points.txt +0 -0
@@ -10,7 +10,7 @@ import pandas as pd
|
|
10
10
|
import seaborn as sns
|
11
11
|
from sklearn import metrics
|
12
12
|
|
13
|
-
from validmind import tags, tasks
|
13
|
+
from validmind import RawData, tags, tasks
|
14
14
|
from validmind.logging import get_logger
|
15
15
|
from validmind.vm_models import VMDataset, VMModel
|
16
16
|
|
@@ -299,4 +299,10 @@ def OverfitDiagnosis(
|
|
299
299
|
}
|
300
300
|
)
|
301
301
|
|
302
|
-
return (
|
302
|
+
return (
|
303
|
+
{"Overfit Diagnosis": test_results},
|
304
|
+
*figures,
|
305
|
+
RawData(
|
306
|
+
model=model.input_id, datasets=[dataset.input_id for dataset in datasets]
|
307
|
+
),
|
308
|
+
)
|
@@ -83,4 +83,9 @@ def PrecisionRecallCurve(model: VMModel, dataset: VMDataset):
|
|
83
83
|
),
|
84
84
|
)
|
85
85
|
|
86
|
-
return fig, RawData(
|
86
|
+
return fig, RawData(
|
87
|
+
precision=precision,
|
88
|
+
recall=recall,
|
89
|
+
model=model.input_id,
|
90
|
+
dataset=dataset.input_id,
|
91
|
+
)
|
@@ -6,7 +6,7 @@ import numpy as np
|
|
6
6
|
import pandas as pd
|
7
7
|
from sklearn import metrics
|
8
8
|
|
9
|
-
from validmind import tags, tasks
|
9
|
+
from validmind import RawData, tags, tasks
|
10
10
|
|
11
11
|
|
12
12
|
@tags("sklearn", "model_performance")
|
@@ -62,7 +62,11 @@ def RegressionErrors(model, dataset):
|
|
62
62
|
y_pred = dataset.y_pred(model)
|
63
63
|
y_true = y_true.astype(y_pred.dtype)
|
64
64
|
|
65
|
-
|
65
|
+
results_df = _regression_errors(y_true, y_pred)
|
66
|
+
|
67
|
+
return results_df, RawData(
|
68
|
+
y_true=y_true, y_pred=y_pred, model=model.input_id, dataset=dataset.input_id
|
69
|
+
)
|
66
70
|
|
67
71
|
|
68
72
|
def _regression_errors(y_true, y_pred):
|
@@ -5,7 +5,7 @@
|
|
5
5
|
import numpy as np
|
6
6
|
from sklearn.metrics import mean_absolute_error, mean_squared_error
|
7
7
|
|
8
|
-
from validmind import tags, tasks
|
8
|
+
from validmind import RawData, tags, tasks
|
9
9
|
from validmind.logging import get_logger
|
10
10
|
from validmind.vm_models import VMDataset, VMModel
|
11
11
|
|
@@ -74,10 +74,15 @@ def RegressionPerformance(model: VMModel, dataset: VMDataset):
|
|
74
74
|
# MBD calculation
|
75
75
|
metrics["Mean Bias Deviation (MBD)"] = np.mean(y_pred - y_true)
|
76
76
|
|
77
|
-
return
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
77
|
+
return (
|
78
|
+
[
|
79
|
+
{
|
80
|
+
"Metric": metric,
|
81
|
+
"Value": value,
|
82
|
+
}
|
83
|
+
for metric, value in metrics.items()
|
84
|
+
],
|
85
|
+
RawData(
|
86
|
+
y_true=y_true, y_pred=y_pred, model=model.input_id, dataset=dataset.input_id
|
87
|
+
),
|
88
|
+
)
|
@@ -5,7 +5,7 @@
|
|
5
5
|
import pandas as pd
|
6
6
|
from sklearn import metrics
|
7
7
|
|
8
|
-
from validmind import tags, tasks
|
8
|
+
from validmind import RawData, tags, tasks
|
9
9
|
from validmind.tests.model_validation.statsmodels.statsutils import adj_r2_score
|
10
10
|
|
11
11
|
|
@@ -55,11 +55,14 @@ def RegressionR2Square(dataset, model):
|
|
55
55
|
y_pred = dataset.y_pred(model)
|
56
56
|
y_true = y_true.astype(y_pred.dtype)
|
57
57
|
|
58
|
+
r2 = metrics.r2_score(y_true, y_pred)
|
59
|
+
adj_r2 = adj_r2_score(y_true, y_pred, len(y_true), len(dataset.feature_columns))
|
60
|
+
|
58
61
|
return pd.DataFrame(
|
59
62
|
{
|
60
|
-
"R-squared (R2) Score": [
|
61
|
-
"Adjusted R-squared (R2) Score": [
|
62
|
-
adj_r2_score(y_true, y_pred, len(y_true), len(dataset.feature_columns))
|
63
|
-
],
|
63
|
+
"R-squared (R2) Score": [r2],
|
64
|
+
"Adjusted R-squared (R2) Score": [adj_r2],
|
64
65
|
}
|
66
|
+
), RawData(
|
67
|
+
y_true=y_true, y_pred=y_pred, model=model.input_id, dataset=dataset.input_id
|
65
68
|
)
|
@@ -229,5 +229,10 @@ def SHAPGlobalImportance(
|
|
229
229
|
return (
|
230
230
|
generate_shap_plot("mean", shap_values, shap_sample),
|
231
231
|
generate_shap_plot("summary", shap_values, shap_sample),
|
232
|
-
RawData(
|
232
|
+
RawData(
|
233
|
+
shap_values=shap_values,
|
234
|
+
shap_sample=shap_sample,
|
235
|
+
model=model.input_id,
|
236
|
+
dataset=dataset.input_id,
|
237
|
+
),
|
233
238
|
)
|
@@ -5,7 +5,7 @@
|
|
5
5
|
import pandas as pd
|
6
6
|
import plotly.graph_objects as go
|
7
7
|
|
8
|
-
from validmind import tags, tasks
|
8
|
+
from validmind import RawData, tags, tasks
|
9
9
|
from validmind.vm_models import VMDataset, VMModel
|
10
10
|
|
11
11
|
|
@@ -128,4 +128,12 @@ def ScoreProbabilityAlignment(
|
|
128
128
|
height=600,
|
129
129
|
)
|
130
130
|
|
131
|
-
|
131
|
+
# Include raw data for post-processing
|
132
|
+
raw_data = RawData(
|
133
|
+
score_bins=df[["score_bin", score_column]],
|
134
|
+
predicted_probabilities=df["probability"],
|
135
|
+
model=model.input_id,
|
136
|
+
dataset=dataset.input_id,
|
137
|
+
)
|
138
|
+
|
139
|
+
return results_df, fig, raw_data
|
@@ -110,5 +110,9 @@ def SilhouettePlot(model: VMModel, dataset: VMDataset):
|
|
110
110
|
"Silhouette Score": silhouette_avg,
|
111
111
|
},
|
112
112
|
fig,
|
113
|
-
RawData(
|
113
|
+
RawData(
|
114
|
+
sample_silhouette_values=sample_silhouette_values,
|
115
|
+
model=model.input_id,
|
116
|
+
dataset=dataset.input_id,
|
117
|
+
),
|
114
118
|
)
|
@@ -4,7 +4,7 @@
|
|
4
4
|
|
5
5
|
from sklearn import metrics
|
6
6
|
|
7
|
-
from validmind import tags, tasks
|
7
|
+
from validmind import RawData, tags, tasks
|
8
8
|
from validmind.vm_models import VMDataset, VMModel
|
9
9
|
|
10
10
|
|
@@ -48,11 +48,14 @@ def VMeasure(dataset: VMDataset, model: VMModel):
|
|
48
48
|
the other. The V Measure Score does not provide flexibility in assigning different weights to homogeneity and
|
49
49
|
completeness.
|
50
50
|
"""
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
}
|
58
|
-
|
51
|
+
v_measure = metrics.v_measure_score(
|
52
|
+
labels_true=dataset.y,
|
53
|
+
labels_pred=dataset.y_pred(model),
|
54
|
+
)
|
55
|
+
|
56
|
+
return (
|
57
|
+
[{"V Measure": v_measure}],
|
58
|
+
RawData(
|
59
|
+
v_measure_score=v_measure, model=model.input_id, dataset=dataset.input_id
|
60
|
+
),
|
61
|
+
)
|
@@ -64,7 +64,11 @@ def CumulativePredictionProbabilities(dataset, model, title="Cumulative Probabil
|
|
64
64
|
|
65
65
|
fig, fig_data = _plot_cumulative_prob(df, dataset.target_column, title)
|
66
66
|
|
67
|
-
return fig, RawData(
|
67
|
+
return fig, RawData(
|
68
|
+
cumulative_probabilities=fig_data,
|
69
|
+
model=model.input_id,
|
70
|
+
dataset=dataset.input_id,
|
71
|
+
)
|
68
72
|
|
69
73
|
|
70
74
|
def _plot_cumulative_prob(df, target_col, title):
|
@@ -5,7 +5,7 @@
|
|
5
5
|
import pandas as pd
|
6
6
|
from statsmodels.stats.stattools import durbin_watson
|
7
7
|
|
8
|
-
from validmind import tags, tasks
|
8
|
+
from validmind import RawData, tags, tasks
|
9
9
|
|
10
10
|
|
11
11
|
@tasks("regression")
|
@@ -81,4 +81,9 @@ def DurbinWatsonTest(dataset, model, threshold=[1.5, 2.5]):
|
|
81
81
|
"threshold": [str(threshold)],
|
82
82
|
"autocorrelation": [get_autocorrelation(dw_statistic, threshold)],
|
83
83
|
}
|
84
|
+
), RawData(
|
85
|
+
residuals=residuals,
|
86
|
+
dw_statistic=dw_statistic,
|
87
|
+
model=model.input_id,
|
88
|
+
dataset=dataset.input_id,
|
84
89
|
)
|
@@ -6,7 +6,7 @@ import numpy as np
|
|
6
6
|
import pandas as pd
|
7
7
|
from sklearn.metrics import roc_auc_score, roc_curve
|
8
8
|
|
9
|
-
from validmind import tags, tasks
|
9
|
+
from validmind import RawData, tags, tasks
|
10
10
|
|
11
11
|
|
12
12
|
@tags("model_performance")
|
@@ -76,4 +76,11 @@ def GINITable(dataset, model):
|
|
76
76
|
"GINI": [gini],
|
77
77
|
"KS": [max(tpr - fpr)],
|
78
78
|
}
|
79
|
+
), RawData(
|
80
|
+
fpr=fpr,
|
81
|
+
tpr=tpr,
|
82
|
+
y_true=y_true,
|
83
|
+
y_prob=y_prob,
|
84
|
+
model=model.input_id,
|
85
|
+
dataset=dataset.input_id,
|
79
86
|
)
|
@@ -4,7 +4,7 @@
|
|
4
4
|
|
5
5
|
from statsmodels.stats.diagnostic import kstest_normal
|
6
6
|
|
7
|
-
from validmind import tags, tasks
|
7
|
+
from validmind import RawData, tags, tasks
|
8
8
|
from validmind.errors import InvalidTestParametersError
|
9
9
|
from validmind.vm_models import VMDataset, VMModel
|
10
10
|
|
@@ -66,4 +66,4 @@ def KolmogorovSmirnov(model: VMModel, dataset: VMDataset, dist: str = "norm"):
|
|
66
66
|
"P-Value": result["pvalue"],
|
67
67
|
}
|
68
68
|
for k, result in ks_values.items()
|
69
|
-
]
|
69
|
+
], RawData(ks_values=ks_values, dataset=dataset.input_id)
|
@@ -6,7 +6,7 @@
|
|
6
6
|
import plotly.graph_objects as go
|
7
7
|
from matplotlib import cm
|
8
8
|
|
9
|
-
from validmind import tags, tasks
|
9
|
+
from validmind import RawData, tags, tasks
|
10
10
|
|
11
11
|
|
12
12
|
@tags("visualization", "credit_risk")
|
@@ -60,7 +60,11 @@ def PredictionProbabilitiesHistogram(
|
|
60
60
|
|
61
61
|
fig = _plot_prob_histogram(df, dataset.target_column, title)
|
62
62
|
|
63
|
-
return fig
|
63
|
+
return fig, RawData(
|
64
|
+
probabilities=df["probabilities"],
|
65
|
+
model=model.input_id,
|
66
|
+
dataset=dataset.input_id,
|
67
|
+
)
|
64
68
|
|
65
69
|
|
66
70
|
def _plot_prob_histogram(df, target_col, title):
|
@@ -7,7 +7,7 @@ import pandas as pd
|
|
7
7
|
import plotly.graph_objects as go
|
8
8
|
from scipy import stats
|
9
9
|
|
10
|
-
from validmind import tags, tasks
|
10
|
+
from validmind import RawData, tags, tasks
|
11
11
|
from validmind.errors import SkipTestError
|
12
12
|
|
13
13
|
|
@@ -97,4 +97,10 @@ def RegressionCoeffs(model):
|
|
97
97
|
yaxis_title="Coefficients",
|
98
98
|
)
|
99
99
|
|
100
|
-
return
|
100
|
+
return (
|
101
|
+
fig,
|
102
|
+
coefficients,
|
103
|
+
RawData(
|
104
|
+
model=model.input_id, std_err=std_err, lower_ci=lower_ci, upper_ci=upper_ci
|
105
|
+
),
|
106
|
+
)
|
@@ -7,7 +7,7 @@ from typing import Union
|
|
7
7
|
import matplotlib.pyplot as plt
|
8
8
|
import pandas as pd
|
9
9
|
|
10
|
-
from validmind import tags, tasks
|
10
|
+
from validmind import RawData, tags, tasks
|
11
11
|
from validmind.logging import get_logger
|
12
12
|
from validmind.vm_models import VMDataset, VMModel
|
13
13
|
|
@@ -87,4 +87,9 @@ def RegressionModelForecastPlot(
|
|
87
87
|
|
88
88
|
plt.close()
|
89
89
|
|
90
|
-
return fig
|
90
|
+
return fig, RawData(
|
91
|
+
observed_values=dataset.y.tolist(),
|
92
|
+
forecast_values=dataset.y_pred(model).tolist(),
|
93
|
+
model=model.input_id,
|
94
|
+
dataset=dataset.input_id,
|
95
|
+
)
|
@@ -4,7 +4,7 @@
|
|
4
4
|
|
5
5
|
from sklearn.metrics import mean_squared_error, r2_score
|
6
6
|
|
7
|
-
from validmind import tags, tasks
|
7
|
+
from validmind import RawData, tags, tasks
|
8
8
|
from validmind.vm_models import VMDataset, VMModel
|
9
9
|
|
10
10
|
from .statsutils import adj_r2_score
|
@@ -45,17 +45,19 @@ def RegressionModelSummary(dataset: VMDataset, model: VMModel):
|
|
45
45
|
- A high R-Squared or Adjusted R-Squared may not necessarily indicate a good model, especially in cases of
|
46
46
|
overfitting.
|
47
47
|
"""
|
48
|
+
y_true = dataset.y
|
49
|
+
y_pred = dataset.y_pred(model)
|
50
|
+
|
48
51
|
return [
|
49
52
|
{
|
50
53
|
"Independent Variables": dataset.feature_columns,
|
51
|
-
"R-Squared": r2_score(
|
54
|
+
"R-Squared": r2_score(y_true, y_pred),
|
52
55
|
"Adjusted R-Squared": adj_r2_score(
|
53
|
-
dataset.
|
54
|
-
dataset.y_pred(model),
|
55
|
-
len(dataset.y),
|
56
|
-
len(dataset.feature_columns),
|
56
|
+
y_true, y_pred, len(y_true), len(dataset.feature_columns)
|
57
57
|
),
|
58
|
-
"MSE": mean_squared_error(
|
59
|
-
"RMSE": mean_squared_error(
|
58
|
+
"MSE": mean_squared_error(y_true, y_pred, squared=True),
|
59
|
+
"RMSE": mean_squared_error(y_true, y_pred, squared=False),
|
60
60
|
}
|
61
|
-
]
|
61
|
+
], RawData(
|
62
|
+
y_true=y_true, y_pred=y_pred, model=model.input_id, dataset=dataset.input_id
|
63
|
+
)
|
@@ -9,7 +9,7 @@ import pandas as pd
|
|
9
9
|
import plotly.graph_objects as go
|
10
10
|
from sklearn.calibration import calibration_curve
|
11
11
|
|
12
|
-
from validmind import tags, tasks
|
12
|
+
from validmind import RawData, tags, tasks
|
13
13
|
from validmind.errors import SkipTestError
|
14
14
|
from validmind.vm_models import VMDataset, VMModel
|
15
15
|
|
@@ -217,4 +217,14 @@ def CalibrationCurveDrift(
|
|
217
217
|
fig,
|
218
218
|
{"Mean Predicted Probabilities": pred_df, "Fraction of Positives": true_df},
|
219
219
|
pass_fail_bool,
|
220
|
+
RawData(
|
221
|
+
prob_true_ref=prob_true_ref,
|
222
|
+
prob_pred_ref=prob_pred_ref,
|
223
|
+
prob_true_mon=prob_true_mon,
|
224
|
+
prob_pred_mon=prob_pred_mon,
|
225
|
+
bin_labels=bin_labels,
|
226
|
+
model=model.input_id,
|
227
|
+
dataset_ref=datasets[0].input_id,
|
228
|
+
dataset_mon=datasets[1].input_id,
|
229
|
+
),
|
220
230
|
)
|
@@ -8,7 +8,7 @@ import numpy as np
|
|
8
8
|
import pandas as pd
|
9
9
|
from sklearn.metrics import classification_report
|
10
10
|
|
11
|
-
from validmind import tags, tasks
|
11
|
+
from validmind import RawData, tags, tasks
|
12
12
|
from validmind.vm_models import VMDataset, VMModel
|
13
13
|
|
14
14
|
|
@@ -145,4 +145,12 @@ def ClassificationAccuracyDrift(
|
|
145
145
|
# Calculate overall pass/fail
|
146
146
|
pass_fail_bool = (df["Pass/Fail"] == "Pass").all()
|
147
147
|
|
148
|
-
|
148
|
+
raw_data = RawData(
|
149
|
+
report_reference=report_ref,
|
150
|
+
report_monitoring=report_mon,
|
151
|
+
model=model.input_id,
|
152
|
+
dataset_reference=datasets[0].input_id,
|
153
|
+
dataset_monitoring=datasets[1].input_id,
|
154
|
+
)
|
155
|
+
|
156
|
+
return ({"Classification Accuracy Metrics": df}, pass_fail_bool, raw_data)
|
@@ -8,7 +8,7 @@ import numpy as np
|
|
8
8
|
import pandas as pd
|
9
9
|
from sklearn.metrics import confusion_matrix
|
10
10
|
|
11
|
-
from validmind import tags, tasks
|
11
|
+
from validmind import RawData, tags, tasks
|
12
12
|
from validmind.vm_models import VMDataset, VMModel
|
13
13
|
|
14
14
|
|
@@ -190,4 +190,11 @@ def ConfusionMatrixDrift(
|
|
190
190
|
return (
|
191
191
|
{"Confusion Matrix Metrics": metrics_df, "Sample Counts": counts_df},
|
192
192
|
pass_fail_bool,
|
193
|
+
RawData(
|
194
|
+
confusion_matrix_reference=cm_ref,
|
195
|
+
confusion_matrix_monitoring=cm_mon,
|
196
|
+
model=model.input_id,
|
197
|
+
dataset_reference=datasets[0].input_id,
|
198
|
+
dataset_monitoring=datasets[1].input_id,
|
199
|
+
),
|
193
200
|
)
|
@@ -8,7 +8,7 @@ import numpy as np
|
|
8
8
|
import plotly.graph_objects as go
|
9
9
|
from plotly.subplots import make_subplots
|
10
10
|
|
11
|
-
from validmind import tags, tasks
|
11
|
+
from validmind import RawData, tags, tasks
|
12
12
|
from validmind.vm_models import VMDataset, VMModel
|
13
13
|
|
14
14
|
|
@@ -83,6 +83,7 @@ def CumulativePredictionProbabilitiesDrift(
|
|
83
83
|
diff_color = "rgba(148, 103, 189, 0.8)" # Purple with 0.8 opacity
|
84
84
|
|
85
85
|
figures = []
|
86
|
+
raw_data = {}
|
86
87
|
for class_value in classes:
|
87
88
|
# Create figure with secondary y-axis
|
88
89
|
fig = make_subplots(
|
@@ -175,4 +176,19 @@ def CumulativePredictionProbabilitiesDrift(
|
|
175
176
|
|
176
177
|
figures.append(fig)
|
177
178
|
|
178
|
-
|
179
|
+
# Store raw data for current class
|
180
|
+
raw_data[f"class_{class_value}_ref_probs"] = ref_probs
|
181
|
+
raw_data[f"class_{class_value}_mon_probs"] = mon_probs
|
182
|
+
raw_data[f"class_{class_value}_ref_sorted"] = ref_sorted
|
183
|
+
raw_data[f"class_{class_value}_ref_cumsum"] = ref_cumsum
|
184
|
+
raw_data[f"class_{class_value}_mon_sorted"] = mon_sorted
|
185
|
+
raw_data[f"class_{class_value}_mon_cumsum"] = mon_cumsum
|
186
|
+
|
187
|
+
return tuple(figures) + (
|
188
|
+
RawData(
|
189
|
+
model=model.input_id,
|
190
|
+
dataset_reference=datasets[0].input_id,
|
191
|
+
dataset_monitoring=datasets[1].input_id,
|
192
|
+
**raw_data,
|
193
|
+
),
|
194
|
+
)
|
@@ -6,7 +6,7 @@ import numpy as np
|
|
6
6
|
import pandas as pd
|
7
7
|
import plotly.graph_objects as go
|
8
8
|
|
9
|
-
from validmind import tags, tasks
|
9
|
+
from validmind import RawData, tags, tasks
|
10
10
|
|
11
11
|
|
12
12
|
def calculate_psi_score(actual, expected):
|
@@ -183,4 +183,11 @@ def FeatureDrift(
|
|
183
183
|
# Calculate overall pass/fail
|
184
184
|
pass_fail_bool = (psi_df["Pass/Fail"] == "Pass").all()
|
185
185
|
|
186
|
-
|
186
|
+
# Prepare raw data
|
187
|
+
raw_data = RawData(
|
188
|
+
distributions=distributions,
|
189
|
+
dataset_reference=datasets[0].input_id,
|
190
|
+
dataset_monitoring=datasets[1].input_id,
|
191
|
+
)
|
192
|
+
|
193
|
+
return ({"PSI Scores": psi_df}, *figures, pass_fail_bool, raw_data)
|
@@ -5,7 +5,7 @@
|
|
5
5
|
|
6
6
|
import matplotlib.pyplot as plt
|
7
7
|
|
8
|
-
from validmind import tags, tasks
|
8
|
+
from validmind import RawData, tags, tasks
|
9
9
|
|
10
10
|
|
11
11
|
@tags("visualization")
|
@@ -74,4 +74,10 @@ def PredictionAcrossEachFeature(datasets, model):
|
|
74
74
|
figures_to_save.append(fig)
|
75
75
|
plt.close()
|
76
76
|
|
77
|
-
return tuple(figures_to_save)
|
77
|
+
return tuple(figures_to_save), RawData(
|
78
|
+
y_prob_reference=y_prob_reference,
|
79
|
+
y_prob_monitoring=y_prob_monitoring,
|
80
|
+
model=model.input_id,
|
81
|
+
dataset_reference=datasets[0].input_id,
|
82
|
+
dataset_monitoring=datasets[1].input_id,
|
83
|
+
)
|
@@ -5,7 +5,7 @@
|
|
5
5
|
import pandas as pd
|
6
6
|
import plotly.graph_objects as go
|
7
7
|
|
8
|
-
from validmind import tags, tasks
|
8
|
+
from validmind import RawData, tags, tasks
|
9
9
|
|
10
10
|
|
11
11
|
@tags("visualization")
|
@@ -140,4 +140,15 @@ def PredictionCorrelation(datasets, model, drift_pct_threshold=20):
|
|
140
140
|
# Calculate overall pass/fail
|
141
141
|
pass_fail_bool = (corr_final["Pass/Fail"] == "Pass").all()
|
142
142
|
|
143
|
-
return (
|
143
|
+
return (
|
144
|
+
{"Correlation Pair Table": corr_final},
|
145
|
+
fig,
|
146
|
+
pass_fail_bool,
|
147
|
+
RawData(
|
148
|
+
reference_correlations=corr_ref.to_dict(),
|
149
|
+
monitoring_correlations=corr_mon.to_dict(),
|
150
|
+
model=model.input_id,
|
151
|
+
dataset_reference=datasets[0].input_id,
|
152
|
+
dataset_monitoring=datasets[1].input_id,
|
153
|
+
),
|
154
|
+
)
|
@@ -10,7 +10,7 @@ import plotly.graph_objects as go
|
|
10
10
|
from plotly.subplots import make_subplots
|
11
11
|
from scipy import stats
|
12
12
|
|
13
|
-
from validmind import tags, tasks
|
13
|
+
from validmind import RawData, tags, tasks
|
14
14
|
from validmind.vm_models import VMDataset, VMModel
|
15
15
|
|
16
16
|
|
@@ -201,4 +201,15 @@ def PredictionProbabilitiesHistogramDrift(
|
|
201
201
|
}
|
202
202
|
)
|
203
203
|
|
204
|
-
return
|
204
|
+
return (
|
205
|
+
fig,
|
206
|
+
tables,
|
207
|
+
all_passed,
|
208
|
+
RawData(
|
209
|
+
reference_probabilities=y_prob_ref,
|
210
|
+
monitoring_probabilities=y_prob_mon,
|
211
|
+
model=model.input_id,
|
212
|
+
dataset_reference=datasets[0].input_id,
|
213
|
+
dataset_monitoring=datasets[1].input_id,
|
214
|
+
),
|
215
|
+
)
|