validmind 2.8.10__py3-none-any.whl → 2.8.20__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- validmind/__init__.py +6 -5
- validmind/__version__.py +1 -1
- validmind/ai/test_descriptions.py +17 -11
- validmind/ai/utils.py +2 -2
- validmind/api_client.py +75 -32
- validmind/client.py +108 -100
- validmind/client_config.py +3 -3
- validmind/datasets/classification/__init__.py +7 -3
- validmind/datasets/credit_risk/lending_club.py +28 -16
- validmind/datasets/nlp/cnn_dailymail.py +10 -4
- validmind/datasets/regression/__init__.py +22 -5
- validmind/errors.py +17 -7
- validmind/input_registry.py +1 -1
- validmind/logging.py +44 -35
- validmind/models/foundation.py +2 -2
- validmind/models/function.py +10 -3
- validmind/template.py +30 -22
- validmind/test_suites/__init__.py +2 -2
- validmind/tests/_store.py +13 -4
- validmind/tests/comparison.py +65 -33
- validmind/tests/data_validation/ACFandPACFPlot.py +4 -1
- validmind/tests/data_validation/AutoMA.py +1 -1
- validmind/tests/data_validation/BivariateScatterPlots.py +5 -1
- validmind/tests/data_validation/BoxPierce.py +3 -1
- validmind/tests/data_validation/ClassImbalance.py +4 -2
- validmind/tests/data_validation/DatasetDescription.py +3 -24
- validmind/tests/data_validation/DescriptiveStatistics.py +1 -1
- validmind/tests/data_validation/DickeyFullerGLS.py +1 -1
- validmind/tests/data_validation/FeatureTargetCorrelationPlot.py +1 -1
- validmind/tests/data_validation/HighCardinality.py +5 -1
- validmind/tests/data_validation/HighPearsonCorrelation.py +1 -1
- validmind/tests/data_validation/IQROutliersBarPlot.py +5 -3
- validmind/tests/data_validation/IQROutliersTable.py +5 -2
- validmind/tests/data_validation/IsolationForestOutliers.py +5 -4
- validmind/tests/data_validation/JarqueBera.py +2 -2
- validmind/tests/data_validation/LJungBox.py +2 -2
- validmind/tests/data_validation/LaggedCorrelationHeatmap.py +1 -1
- validmind/tests/data_validation/MissingValues.py +14 -10
- validmind/tests/data_validation/MissingValuesBarPlot.py +3 -1
- validmind/tests/data_validation/MutualInformation.py +2 -1
- validmind/tests/data_validation/PearsonCorrelationMatrix.py +1 -1
- validmind/tests/data_validation/ProtectedClassesCombination.py +2 -0
- validmind/tests/data_validation/ProtectedClassesDescription.py +2 -2
- validmind/tests/data_validation/ProtectedClassesDisparity.py +9 -5
- validmind/tests/data_validation/ProtectedClassesThresholdOptimizer.py +10 -2
- validmind/tests/data_validation/RollingStatsPlot.py +2 -1
- validmind/tests/data_validation/ScoreBandDefaultRates.py +4 -2
- validmind/tests/data_validation/SeasonalDecompose.py +1 -1
- validmind/tests/data_validation/ShapiroWilk.py +2 -2
- validmind/tests/data_validation/Skewness.py +7 -6
- validmind/tests/data_validation/SpreadPlot.py +1 -1
- validmind/tests/data_validation/TabularCategoricalBarPlots.py +1 -1
- validmind/tests/data_validation/TabularDateTimeHistograms.py +1 -1
- validmind/tests/data_validation/TargetRateBarPlots.py +4 -1
- validmind/tests/data_validation/TimeSeriesFrequency.py +1 -1
- validmind/tests/data_validation/TimeSeriesOutliers.py +7 -2
- validmind/tests/data_validation/WOEBinPlots.py +1 -1
- validmind/tests/data_validation/WOEBinTable.py +1 -1
- validmind/tests/data_validation/ZivotAndrewsArch.py +5 -2
- validmind/tests/data_validation/nlp/CommonWords.py +1 -1
- validmind/tests/data_validation/nlp/Hashtags.py +1 -1
- validmind/tests/data_validation/nlp/LanguageDetection.py +1 -1
- validmind/tests/data_validation/nlp/Mentions.py +1 -1
- validmind/tests/data_validation/nlp/PolarityAndSubjectivity.py +5 -1
- validmind/tests/data_validation/nlp/Punctuations.py +1 -1
- validmind/tests/data_validation/nlp/Sentiment.py +3 -1
- validmind/tests/data_validation/nlp/TextDescription.py +1 -1
- validmind/tests/data_validation/nlp/Toxicity.py +1 -1
- validmind/tests/decorator.py +14 -11
- validmind/tests/load.py +38 -24
- validmind/tests/model_validation/BertScore.py +7 -1
- validmind/tests/model_validation/BleuScore.py +7 -1
- validmind/tests/model_validation/ClusterSizeDistribution.py +3 -1
- validmind/tests/model_validation/ContextualRecall.py +9 -1
- validmind/tests/model_validation/FeaturesAUC.py +1 -1
- validmind/tests/model_validation/MeteorScore.py +7 -1
- validmind/tests/model_validation/ModelPredictionResiduals.py +5 -1
- validmind/tests/model_validation/RegardScore.py +6 -1
- validmind/tests/model_validation/RegressionResidualsPlot.py +10 -1
- validmind/tests/model_validation/RougeScore.py +3 -1
- validmind/tests/model_validation/TimeSeriesPredictionWithCI.py +2 -0
- validmind/tests/model_validation/TimeSeriesPredictionsPlot.py +10 -2
- validmind/tests/model_validation/TimeSeriesR2SquareBySegments.py +6 -2
- validmind/tests/model_validation/TokenDisparity.py +5 -1
- validmind/tests/model_validation/ToxicityScore.py +2 -0
- validmind/tests/model_validation/embeddings/ClusterDistribution.py +1 -1
- validmind/tests/model_validation/embeddings/CosineSimilarityComparison.py +5 -1
- validmind/tests/model_validation/embeddings/CosineSimilarityDistribution.py +5 -1
- validmind/tests/model_validation/embeddings/CosineSimilarityHeatmap.py +5 -1
- validmind/tests/model_validation/embeddings/DescriptiveAnalytics.py +2 -0
- validmind/tests/model_validation/embeddings/EmbeddingsVisualization2D.py +5 -1
- validmind/tests/model_validation/embeddings/EuclideanDistanceComparison.py +6 -2
- validmind/tests/model_validation/embeddings/EuclideanDistanceHeatmap.py +3 -1
- validmind/tests/model_validation/embeddings/PCAComponentsPairwisePlots.py +4 -1
- validmind/tests/model_validation/embeddings/StabilityAnalysisKeyword.py +5 -1
- validmind/tests/model_validation/embeddings/StabilityAnalysisRandomNoise.py +5 -1
- validmind/tests/model_validation/embeddings/StabilityAnalysisSynonyms.py +5 -1
- validmind/tests/model_validation/embeddings/StabilityAnalysisTranslation.py +5 -1
- validmind/tests/model_validation/embeddings/TSNEComponentsPairwisePlots.py +6 -1
- validmind/tests/model_validation/ragas/AnswerCorrectness.py +5 -3
- validmind/tests/model_validation/ragas/AspectCritic.py +4 -1
- validmind/tests/model_validation/ragas/ContextEntityRecall.py +5 -3
- validmind/tests/model_validation/ragas/ContextPrecision.py +5 -3
- validmind/tests/model_validation/ragas/ContextPrecisionWithoutReference.py +5 -3
- validmind/tests/model_validation/ragas/ContextRecall.py +5 -3
- validmind/tests/model_validation/ragas/Faithfulness.py +5 -3
- validmind/tests/model_validation/ragas/NoiseSensitivity.py +1 -1
- validmind/tests/model_validation/ragas/ResponseRelevancy.py +5 -3
- validmind/tests/model_validation/ragas/SemanticSimilarity.py +5 -3
- validmind/tests/model_validation/sklearn/AdjustedMutualInformation.py +9 -9
- validmind/tests/model_validation/sklearn/AdjustedRandIndex.py +9 -9
- validmind/tests/model_validation/sklearn/CalibrationCurve.py +5 -2
- validmind/tests/model_validation/sklearn/ClassifierThresholdOptimization.py +28 -5
- validmind/tests/model_validation/sklearn/ClusterCosineSimilarity.py +5 -1
- validmind/tests/model_validation/sklearn/ClusterPerformanceMetrics.py +24 -14
- validmind/tests/model_validation/sklearn/CompletenessScore.py +8 -9
- validmind/tests/model_validation/sklearn/ConfusionMatrix.py +22 -3
- validmind/tests/model_validation/sklearn/FeatureImportance.py +6 -2
- validmind/tests/model_validation/sklearn/FowlkesMallowsScore.py +12 -9
- validmind/tests/model_validation/sklearn/HomogeneityScore.py +14 -9
- validmind/tests/model_validation/sklearn/HyperParametersTuning.py +4 -2
- validmind/tests/model_validation/sklearn/KMeansClustersOptimization.py +6 -1
- validmind/tests/model_validation/sklearn/MinimumAccuracy.py +12 -7
- validmind/tests/model_validation/sklearn/MinimumF1Score.py +12 -7
- validmind/tests/model_validation/sklearn/MinimumROCAUCScore.py +21 -6
- validmind/tests/model_validation/sklearn/OverfitDiagnosis.py +11 -3
- validmind/tests/model_validation/sklearn/PermutationFeatureImportance.py +5 -1
- validmind/tests/model_validation/sklearn/PopulationStabilityIndex.py +5 -1
- validmind/tests/model_validation/sklearn/PrecisionRecallCurve.py +6 -1
- validmind/tests/model_validation/sklearn/ROCCurve.py +3 -1
- validmind/tests/model_validation/sklearn/RegressionErrors.py +6 -2
- validmind/tests/model_validation/sklearn/RegressionPerformance.py +13 -8
- validmind/tests/model_validation/sklearn/RegressionR2Square.py +8 -5
- validmind/tests/model_validation/sklearn/RobustnessDiagnosis.py +5 -1
- validmind/tests/model_validation/sklearn/SHAPGlobalImportance.py +34 -26
- validmind/tests/model_validation/sklearn/ScoreProbabilityAlignment.py +10 -2
- validmind/tests/model_validation/sklearn/SilhouettePlot.py +5 -1
- validmind/tests/model_validation/sklearn/VMeasure.py +12 -9
- validmind/tests/model_validation/sklearn/WeakspotsDiagnosis.py +15 -10
- validmind/tests/model_validation/statsmodels/CumulativePredictionProbabilities.py +5 -1
- validmind/tests/model_validation/statsmodels/DurbinWatsonTest.py +6 -1
- validmind/tests/model_validation/statsmodels/GINITable.py +8 -1
- validmind/tests/model_validation/statsmodels/KolmogorovSmirnov.py +2 -2
- validmind/tests/model_validation/statsmodels/PredictionProbabilitiesHistogram.py +6 -2
- validmind/tests/model_validation/statsmodels/RegressionCoeffs.py +8 -2
- validmind/tests/model_validation/statsmodels/RegressionFeatureSignificance.py +3 -1
- validmind/tests/model_validation/statsmodels/RegressionModelForecastPlot.py +7 -2
- validmind/tests/model_validation/statsmodels/RegressionModelForecastPlotLevels.py +2 -0
- validmind/tests/model_validation/statsmodels/RegressionModelSensitivityPlot.py +2 -0
- validmind/tests/model_validation/statsmodels/RegressionModelSummary.py +4 -2
- validmind/tests/model_validation/statsmodels/RegressionPermutationFeatureImportance.py +3 -1
- validmind/tests/ongoing_monitoring/CalibrationCurveDrift.py +11 -1
- validmind/tests/ongoing_monitoring/ClassificationAccuracyDrift.py +10 -2
- validmind/tests/ongoing_monitoring/ConfusionMatrixDrift.py +8 -1
- validmind/tests/ongoing_monitoring/CumulativePredictionProbabilitiesDrift.py +18 -2
- validmind/tests/ongoing_monitoring/FeatureDrift.py +9 -2
- validmind/tests/ongoing_monitoring/PredictionAcrossEachFeature.py +8 -2
- validmind/tests/ongoing_monitoring/PredictionCorrelation.py +13 -2
- validmind/tests/ongoing_monitoring/PredictionProbabilitiesHistogramDrift.py +13 -2
- validmind/tests/ongoing_monitoring/ROCCurveDrift.py +16 -2
- validmind/tests/ongoing_monitoring/ScoreBandsDrift.py +11 -2
- validmind/tests/ongoing_monitoring/TargetPredictionDistributionPlot.py +13 -2
- validmind/tests/output.py +66 -11
- validmind/tests/prompt_validation/Clarity.py +1 -1
- validmind/tests/prompt_validation/NegativeInstruction.py +1 -1
- validmind/tests/prompt_validation/Robustness.py +6 -1
- validmind/tests/prompt_validation/Specificity.py +1 -1
- validmind/tests/run.py +28 -14
- validmind/tests/test_providers.py +28 -35
- validmind/tests/utils.py +17 -4
- validmind/unit_metrics/__init__.py +1 -1
- validmind/utils.py +295 -31
- validmind/vm_models/dataset/dataset.py +19 -16
- validmind/vm_models/dataset/utils.py +5 -3
- validmind/vm_models/figure.py +6 -6
- validmind/vm_models/input.py +6 -5
- validmind/vm_models/model.py +5 -5
- validmind/vm_models/result/result.py +122 -43
- validmind/vm_models/result/utils.py +9 -28
- validmind/vm_models/test_suite/__init__.py +5 -0
- validmind/vm_models/test_suite/runner.py +5 -5
- validmind/vm_models/test_suite/summary.py +20 -2
- validmind/vm_models/test_suite/test.py +6 -6
- validmind/vm_models/test_suite/test_suite.py +10 -10
- {validmind-2.8.10.dist-info → validmind-2.8.20.dist-info}/METADATA +4 -5
- {validmind-2.8.10.dist-info → validmind-2.8.20.dist-info}/RECORD +189 -188
- {validmind-2.8.10.dist-info → validmind-2.8.20.dist-info}/WHEEL +1 -1
- {validmind-2.8.10.dist-info → validmind-2.8.20.dist-info}/LICENSE +0 -0
- {validmind-2.8.10.dist-info → validmind-2.8.20.dist-info}/entry_points.txt +0 -0
@@ -9,7 +9,7 @@ import pandas as pd
|
|
9
9
|
import plotly.graph_objects as go
|
10
10
|
from sklearn.calibration import calibration_curve
|
11
11
|
|
12
|
-
from validmind import tags, tasks
|
12
|
+
from validmind import RawData, tags, tasks
|
13
13
|
from validmind.errors import SkipTestError
|
14
14
|
from validmind.vm_models import VMDataset, VMModel
|
15
15
|
|
@@ -217,4 +217,14 @@ def CalibrationCurveDrift(
|
|
217
217
|
fig,
|
218
218
|
{"Mean Predicted Probabilities": pred_df, "Fraction of Positives": true_df},
|
219
219
|
pass_fail_bool,
|
220
|
+
RawData(
|
221
|
+
prob_true_ref=prob_true_ref,
|
222
|
+
prob_pred_ref=prob_pred_ref,
|
223
|
+
prob_true_mon=prob_true_mon,
|
224
|
+
prob_pred_mon=prob_pred_mon,
|
225
|
+
bin_labels=bin_labels,
|
226
|
+
model=model.input_id,
|
227
|
+
dataset_ref=datasets[0].input_id,
|
228
|
+
dataset_mon=datasets[1].input_id,
|
229
|
+
),
|
220
230
|
)
|
@@ -8,7 +8,7 @@ import numpy as np
|
|
8
8
|
import pandas as pd
|
9
9
|
from sklearn.metrics import classification_report
|
10
10
|
|
11
|
-
from validmind import tags, tasks
|
11
|
+
from validmind import RawData, tags, tasks
|
12
12
|
from validmind.vm_models import VMDataset, VMModel
|
13
13
|
|
14
14
|
|
@@ -145,4 +145,12 @@ def ClassificationAccuracyDrift(
|
|
145
145
|
# Calculate overall pass/fail
|
146
146
|
pass_fail_bool = (df["Pass/Fail"] == "Pass").all()
|
147
147
|
|
148
|
-
|
148
|
+
raw_data = RawData(
|
149
|
+
report_reference=report_ref,
|
150
|
+
report_monitoring=report_mon,
|
151
|
+
model=model.input_id,
|
152
|
+
dataset_reference=datasets[0].input_id,
|
153
|
+
dataset_monitoring=datasets[1].input_id,
|
154
|
+
)
|
155
|
+
|
156
|
+
return ({"Classification Accuracy Metrics": df}, pass_fail_bool, raw_data)
|
@@ -8,7 +8,7 @@ import numpy as np
|
|
8
8
|
import pandas as pd
|
9
9
|
from sklearn.metrics import confusion_matrix
|
10
10
|
|
11
|
-
from validmind import tags, tasks
|
11
|
+
from validmind import RawData, tags, tasks
|
12
12
|
from validmind.vm_models import VMDataset, VMModel
|
13
13
|
|
14
14
|
|
@@ -190,4 +190,11 @@ def ConfusionMatrixDrift(
|
|
190
190
|
return (
|
191
191
|
{"Confusion Matrix Metrics": metrics_df, "Sample Counts": counts_df},
|
192
192
|
pass_fail_bool,
|
193
|
+
RawData(
|
194
|
+
confusion_matrix_reference=cm_ref,
|
195
|
+
confusion_matrix_monitoring=cm_mon,
|
196
|
+
model=model.input_id,
|
197
|
+
dataset_reference=datasets[0].input_id,
|
198
|
+
dataset_monitoring=datasets[1].input_id,
|
199
|
+
),
|
193
200
|
)
|
@@ -8,7 +8,7 @@ import numpy as np
|
|
8
8
|
import plotly.graph_objects as go
|
9
9
|
from plotly.subplots import make_subplots
|
10
10
|
|
11
|
-
from validmind import tags, tasks
|
11
|
+
from validmind import RawData, tags, tasks
|
12
12
|
from validmind.vm_models import VMDataset, VMModel
|
13
13
|
|
14
14
|
|
@@ -83,6 +83,7 @@ def CumulativePredictionProbabilitiesDrift(
|
|
83
83
|
diff_color = "rgba(148, 103, 189, 0.8)" # Purple with 0.8 opacity
|
84
84
|
|
85
85
|
figures = []
|
86
|
+
raw_data = {}
|
86
87
|
for class_value in classes:
|
87
88
|
# Create figure with secondary y-axis
|
88
89
|
fig = make_subplots(
|
@@ -175,4 +176,19 @@ def CumulativePredictionProbabilitiesDrift(
|
|
175
176
|
|
176
177
|
figures.append(fig)
|
177
178
|
|
178
|
-
|
179
|
+
# Store raw data for current class
|
180
|
+
raw_data[f"class_{class_value}_ref_probs"] = ref_probs
|
181
|
+
raw_data[f"class_{class_value}_mon_probs"] = mon_probs
|
182
|
+
raw_data[f"class_{class_value}_ref_sorted"] = ref_sorted
|
183
|
+
raw_data[f"class_{class_value}_ref_cumsum"] = ref_cumsum
|
184
|
+
raw_data[f"class_{class_value}_mon_sorted"] = mon_sorted
|
185
|
+
raw_data[f"class_{class_value}_mon_cumsum"] = mon_cumsum
|
186
|
+
|
187
|
+
return tuple(figures) + (
|
188
|
+
RawData(
|
189
|
+
model=model.input_id,
|
190
|
+
dataset_reference=datasets[0].input_id,
|
191
|
+
dataset_monitoring=datasets[1].input_id,
|
192
|
+
**raw_data,
|
193
|
+
),
|
194
|
+
)
|
@@ -6,7 +6,7 @@ import numpy as np
|
|
6
6
|
import pandas as pd
|
7
7
|
import plotly.graph_objects as go
|
8
8
|
|
9
|
-
from validmind import tags, tasks
|
9
|
+
from validmind import RawData, tags, tasks
|
10
10
|
|
11
11
|
|
12
12
|
def calculate_psi_score(actual, expected):
|
@@ -183,4 +183,11 @@ def FeatureDrift(
|
|
183
183
|
# Calculate overall pass/fail
|
184
184
|
pass_fail_bool = (psi_df["Pass/Fail"] == "Pass").all()
|
185
185
|
|
186
|
-
|
186
|
+
# Prepare raw data
|
187
|
+
raw_data = RawData(
|
188
|
+
distributions=distributions,
|
189
|
+
dataset_reference=datasets[0].input_id,
|
190
|
+
dataset_monitoring=datasets[1].input_id,
|
191
|
+
)
|
192
|
+
|
193
|
+
return ({"PSI Scores": psi_df}, *figures, pass_fail_bool, raw_data)
|
@@ -5,7 +5,7 @@
|
|
5
5
|
|
6
6
|
import matplotlib.pyplot as plt
|
7
7
|
|
8
|
-
from validmind import tags, tasks
|
8
|
+
from validmind import RawData, tags, tasks
|
9
9
|
|
10
10
|
|
11
11
|
@tags("visualization")
|
@@ -74,4 +74,10 @@ def PredictionAcrossEachFeature(datasets, model):
|
|
74
74
|
figures_to_save.append(fig)
|
75
75
|
plt.close()
|
76
76
|
|
77
|
-
return tuple(figures_to_save)
|
77
|
+
return tuple(figures_to_save), RawData(
|
78
|
+
y_prob_reference=y_prob_reference,
|
79
|
+
y_prob_monitoring=y_prob_monitoring,
|
80
|
+
model=model.input_id,
|
81
|
+
dataset_reference=datasets[0].input_id,
|
82
|
+
dataset_monitoring=datasets[1].input_id,
|
83
|
+
)
|
@@ -5,7 +5,7 @@
|
|
5
5
|
import pandas as pd
|
6
6
|
import plotly.graph_objects as go
|
7
7
|
|
8
|
-
from validmind import tags, tasks
|
8
|
+
from validmind import RawData, tags, tasks
|
9
9
|
|
10
10
|
|
11
11
|
@tags("visualization")
|
@@ -140,4 +140,15 @@ def PredictionCorrelation(datasets, model, drift_pct_threshold=20):
|
|
140
140
|
# Calculate overall pass/fail
|
141
141
|
pass_fail_bool = (corr_final["Pass/Fail"] == "Pass").all()
|
142
142
|
|
143
|
-
return (
|
143
|
+
return (
|
144
|
+
{"Correlation Pair Table": corr_final},
|
145
|
+
fig,
|
146
|
+
pass_fail_bool,
|
147
|
+
RawData(
|
148
|
+
reference_correlations=corr_ref.to_dict(),
|
149
|
+
monitoring_correlations=corr_mon.to_dict(),
|
150
|
+
model=model.input_id,
|
151
|
+
dataset_reference=datasets[0].input_id,
|
152
|
+
dataset_monitoring=datasets[1].input_id,
|
153
|
+
),
|
154
|
+
)
|
@@ -10,7 +10,7 @@ import plotly.graph_objects as go
|
|
10
10
|
from plotly.subplots import make_subplots
|
11
11
|
from scipy import stats
|
12
12
|
|
13
|
-
from validmind import tags, tasks
|
13
|
+
from validmind import RawData, tags, tasks
|
14
14
|
from validmind.vm_models import VMDataset, VMModel
|
15
15
|
|
16
16
|
|
@@ -201,4 +201,15 @@ def PredictionProbabilitiesHistogramDrift(
|
|
201
201
|
}
|
202
202
|
)
|
203
203
|
|
204
|
-
return
|
204
|
+
return (
|
205
|
+
fig,
|
206
|
+
tables,
|
207
|
+
all_passed,
|
208
|
+
RawData(
|
209
|
+
reference_probabilities=y_prob_ref,
|
210
|
+
monitoring_probabilities=y_prob_mon,
|
211
|
+
model=model.input_id,
|
212
|
+
dataset_reference=datasets[0].input_id,
|
213
|
+
dataset_monitoring=datasets[1].input_id,
|
214
|
+
),
|
215
|
+
)
|
@@ -8,7 +8,7 @@ import numpy as np
|
|
8
8
|
import plotly.graph_objects as go
|
9
9
|
from sklearn.metrics import roc_auc_score, roc_curve
|
10
10
|
|
11
|
-
from validmind import tags, tasks
|
11
|
+
from validmind import RawData, tags, tasks
|
12
12
|
from validmind.errors import SkipTestError
|
13
13
|
from validmind.vm_models import VMDataset, VMModel
|
14
14
|
|
@@ -147,4 +147,18 @@ def ROCCurveDrift(datasets: List[VMDataset], model: VMModel):
|
|
147
147
|
height=500,
|
148
148
|
)
|
149
149
|
|
150
|
-
return
|
150
|
+
return (
|
151
|
+
fig1,
|
152
|
+
fig2,
|
153
|
+
RawData(
|
154
|
+
fpr_ref=fpr_ref,
|
155
|
+
tpr_ref=tpr_ref,
|
156
|
+
auc_ref=auc_ref,
|
157
|
+
fpr_mon=fpr_mon,
|
158
|
+
tpr_mon=tpr_mon,
|
159
|
+
auc_mon=auc_mon,
|
160
|
+
model=model.input_id,
|
161
|
+
dataset_reference=datasets[0].input_id,
|
162
|
+
dataset_monitoring=datasets[1].input_id,
|
163
|
+
),
|
164
|
+
)
|
@@ -7,7 +7,7 @@ from typing import List
|
|
7
7
|
import numpy as np
|
8
8
|
import pandas as pd
|
9
9
|
|
10
|
-
from validmind import tags, tasks
|
10
|
+
from validmind import RawData, tags, tasks
|
11
11
|
from validmind.vm_models import VMDataset, VMModel
|
12
12
|
|
13
13
|
|
@@ -209,4 +209,13 @@ def ScoreBandsDrift(
|
|
209
209
|
tables[table_name] = pd.DataFrame(rows)
|
210
210
|
all_passed &= metric_passed
|
211
211
|
|
212
|
-
|
212
|
+
# Collect raw data
|
213
|
+
raw_data = RawData(
|
214
|
+
ref_results=ref_results,
|
215
|
+
mon_results=mon_results,
|
216
|
+
model=model.input_id,
|
217
|
+
dataset_reference=datasets[0].input_id,
|
218
|
+
dataset_monitoring=datasets[1].input_id,
|
219
|
+
)
|
220
|
+
|
221
|
+
return tables, all_passed, raw_data
|
@@ -7,7 +7,7 @@ import plotly.figure_factory as ff
|
|
7
7
|
import plotly.graph_objects as go
|
8
8
|
from scipy.stats import kurtosis, skew
|
9
9
|
|
10
|
-
from validmind import tags, tasks
|
10
|
+
from validmind import RawData, tags, tasks
|
11
11
|
|
12
12
|
|
13
13
|
@tags("visualization")
|
@@ -142,4 +142,15 @@ def TargetPredictionDistributionPlot(datasets, model, drift_pct_threshold=20):
|
|
142
142
|
|
143
143
|
pass_fail_bool = (moments["Pass/Fail"] == "Pass").all()
|
144
144
|
|
145
|
-
return (
|
145
|
+
return (
|
146
|
+
{"Distribution Moments": moments},
|
147
|
+
fig,
|
148
|
+
pass_fail_bool,
|
149
|
+
RawData(
|
150
|
+
pred_ref=pred_ref,
|
151
|
+
pred_monitor=pred_monitor,
|
152
|
+
model=model.input_id,
|
153
|
+
dataset_reference=datasets[0].input_id,
|
154
|
+
dataset_monitoring=datasets[1].input_id,
|
155
|
+
),
|
156
|
+
)
|
validmind/tests/output.py
CHANGED
@@ -9,6 +9,7 @@ from uuid import uuid4
|
|
9
9
|
import numpy as np
|
10
10
|
import pandas as pd
|
11
11
|
|
12
|
+
from validmind.utils import is_html, md_to_html
|
12
13
|
from validmind.vm_models.figure import (
|
13
14
|
Figure,
|
14
15
|
is_matplotlib_figure,
|
@@ -77,30 +78,72 @@ class FigureOutputHandler(OutputHandler):
|
|
77
78
|
|
78
79
|
class TableOutputHandler(OutputHandler):
|
79
80
|
def can_handle(self, item: Any) -> bool:
|
80
|
-
return isinstance(item, (list, pd.DataFrame, dict, ResultTable))
|
81
|
+
return isinstance(item, (list, pd.DataFrame, dict, ResultTable, tuple))
|
82
|
+
|
83
|
+
def _convert_simple_type(self, data: Any) -> pd.DataFrame:
|
84
|
+
"""Convert a simple data type to a DataFrame."""
|
85
|
+
if isinstance(data, dict):
|
86
|
+
return pd.DataFrame([data])
|
87
|
+
elif data is None:
|
88
|
+
return pd.DataFrame()
|
89
|
+
else:
|
90
|
+
raise ValueError(f"Cannot convert {type(data)} to DataFrame")
|
91
|
+
|
92
|
+
def _convert_list(self, data_list: List) -> pd.DataFrame:
|
93
|
+
"""Convert a list to a DataFrame."""
|
94
|
+
if not data_list:
|
95
|
+
return pd.DataFrame()
|
96
|
+
|
97
|
+
try:
|
98
|
+
return pd.DataFrame(data_list)
|
99
|
+
except Exception as e:
|
100
|
+
# If conversion fails, try to handle common cases
|
101
|
+
if all(
|
102
|
+
isinstance(item, (int, float, str, bool, type(None)))
|
103
|
+
for item in data_list
|
104
|
+
):
|
105
|
+
return pd.DataFrame({"Values": data_list})
|
106
|
+
else:
|
107
|
+
raise ValueError(f"Could not convert list to DataFrame: {e}")
|
108
|
+
|
109
|
+
def _convert_to_dataframe(self, table_data: Any) -> pd.DataFrame:
|
110
|
+
"""Convert various data types to a pandas DataFrame."""
|
111
|
+
# Handle special cases by type
|
112
|
+
if isinstance(table_data, pd.DataFrame):
|
113
|
+
return table_data
|
114
|
+
elif isinstance(table_data, (dict, str, type(None))):
|
115
|
+
return self._convert_simple_type(table_data)
|
116
|
+
elif isinstance(table_data, tuple):
|
117
|
+
return self._convert_list(list(table_data))
|
118
|
+
elif isinstance(table_data, list):
|
119
|
+
return self._convert_list(table_data)
|
120
|
+
else:
|
121
|
+
# If we reach here, we don't know how to handle this type
|
122
|
+
raise ValueError(
|
123
|
+
f"Invalid table format: must be a list of dictionaries or a DataFrame, got {type(table_data)}"
|
124
|
+
)
|
81
125
|
|
82
126
|
def process(
|
83
127
|
self,
|
84
|
-
item: Union[
|
128
|
+
item: Union[
|
129
|
+
List[Dict[str, Any]], pd.DataFrame, Dict[str, Any], ResultTable, str, tuple
|
130
|
+
],
|
85
131
|
result: TestResult,
|
86
132
|
) -> None:
|
133
|
+
# Convert to a dictionary of tables if not already
|
87
134
|
tables = item if isinstance(item, dict) else {"": item}
|
88
135
|
|
89
136
|
for table_name, table_data in tables.items():
|
90
|
-
#
|
137
|
+
# If already a ResultTable, add it directly
|
91
138
|
if isinstance(table_data, ResultTable):
|
92
139
|
result.add_table(table_data)
|
93
140
|
continue
|
94
141
|
|
95
|
-
|
96
|
-
|
97
|
-
"Invalid table format: must be a list of dictionaries or a DataFrame"
|
98
|
-
)
|
99
|
-
|
100
|
-
if isinstance(table_data, list):
|
101
|
-
table_data = pd.DataFrame(table_data)
|
142
|
+
# Convert the data to a DataFrame using our helper method
|
143
|
+
df = self._convert_to_dataframe(table_data)
|
102
144
|
|
103
|
-
|
145
|
+
# Add the resulting DataFrame as a table to the resul
|
146
|
+
result.add_table(ResultTable(data=df, title=table_name or None))
|
104
147
|
|
105
148
|
|
106
149
|
class RawDataOutputHandler(OutputHandler):
|
@@ -111,6 +154,17 @@ class RawDataOutputHandler(OutputHandler):
|
|
111
154
|
result.raw_data = item
|
112
155
|
|
113
156
|
|
157
|
+
class StringOutputHandler(OutputHandler):
|
158
|
+
def can_handle(self, item: Any) -> bool:
|
159
|
+
return isinstance(item, str)
|
160
|
+
|
161
|
+
def process(self, item: Any, result: TestResult) -> None:
|
162
|
+
if not is_html(item):
|
163
|
+
item = md_to_html(item, mathml=True)
|
164
|
+
|
165
|
+
result.description = item
|
166
|
+
|
167
|
+
|
114
168
|
def process_output(item: Any, result: TestResult) -> None:
|
115
169
|
"""Process a single test output item and update the TestResult."""
|
116
170
|
handlers = [
|
@@ -119,6 +173,7 @@ def process_output(item: Any, result: TestResult) -> None:
|
|
119
173
|
FigureOutputHandler(),
|
120
174
|
TableOutputHandler(),
|
121
175
|
RawDataOutputHandler(),
|
176
|
+
StringOutputHandler(),
|
122
177
|
]
|
123
178
|
|
124
179
|
for handler in handlers:
|
@@ -130,5 +130,10 @@ def Robustness(model, dataset, num_tests=10):
|
|
130
130
|
return (
|
131
131
|
results,
|
132
132
|
all(result["Pass/Fail"] == "Pass" for result in results),
|
133
|
-
RawData(
|
133
|
+
RawData(
|
134
|
+
generated_inputs=generated_inputs,
|
135
|
+
responses=responses,
|
136
|
+
model=model.input_id,
|
137
|
+
dataset=dataset.input_id,
|
138
|
+
),
|
134
139
|
)
|
validmind/tests/run.py
CHANGED
@@ -76,7 +76,7 @@ def _get_run_metadata(**metadata: Dict[str, Any]) -> Dict[str, Any]:
|
|
76
76
|
|
77
77
|
def _get_test_kwargs(
|
78
78
|
test_func: callable, inputs: Dict[str, Any], params: Dict[str, Any]
|
79
|
-
):
|
79
|
+
) -> Tuple[Dict[str, Any], Dict[str, Any]]:
|
80
80
|
"""Insepect function signature to build kwargs to pass the inputs and params
|
81
81
|
that the test function expects
|
82
82
|
|
@@ -93,7 +93,7 @@ def _get_test_kwargs(
|
|
93
93
|
params (dict): Test parameters e.g. {"param1": 1, "param2": 2}
|
94
94
|
|
95
95
|
Returns:
|
96
|
-
|
96
|
+
Tuple[Dict[str, Any], Dict[str, Any]]: Tuple of input and param kwargs
|
97
97
|
"""
|
98
98
|
input_kwargs = {} # map function inputs (`dataset` etc) to actual objects
|
99
99
|
|
@@ -222,6 +222,7 @@ def _run_comparison_test(
|
|
222
222
|
params: Union[Dict[str, Any], None],
|
223
223
|
param_grid: Union[Dict[str, List[Any]], List[Dict[str, Any]], None],
|
224
224
|
title: Optional[str] = None,
|
225
|
+
show_params: bool = True,
|
225
226
|
):
|
226
227
|
"""Run a comparison test i.e. a test that compares multiple outputs of a test across
|
227
228
|
different input and/or param combinations"""
|
@@ -242,6 +243,7 @@ def _run_comparison_test(
|
|
242
243
|
show=False,
|
243
244
|
generate_description=False,
|
244
245
|
title=title,
|
246
|
+
show_params=show_params,
|
245
247
|
)
|
246
248
|
for config in run_test_configs
|
247
249
|
]
|
@@ -253,7 +255,9 @@ def _run_comparison_test(
|
|
253
255
|
else:
|
254
256
|
test_doc = describe_test(test_id, raw=True)["Description"]
|
255
257
|
|
256
|
-
combined_outputs, combined_inputs, combined_params = combine_results(
|
258
|
+
combined_outputs, combined_inputs, combined_params = combine_results(
|
259
|
+
results, show_params
|
260
|
+
)
|
257
261
|
|
258
262
|
return build_test_result(
|
259
263
|
outputs=combined_outputs,
|
@@ -265,7 +269,12 @@ def _run_comparison_test(
|
|
265
269
|
)
|
266
270
|
|
267
271
|
|
268
|
-
def _run_test(
|
272
|
+
def _run_test(
|
273
|
+
test_id: TestID,
|
274
|
+
inputs: Dict[str, Any],
|
275
|
+
params: Dict[str, Any],
|
276
|
+
title: Optional[str] = None,
|
277
|
+
):
|
269
278
|
"""Run a standard test and return a TestResult object"""
|
270
279
|
test_func = load_test(test_id)
|
271
280
|
input_kwargs, param_kwargs = _get_test_kwargs(
|
@@ -282,6 +291,7 @@ def _run_test(test_id: TestID, inputs: Dict[str, Any], params: Dict[str, Any]):
|
|
282
291
|
test_doc=getdoc(test_func),
|
283
292
|
inputs=input_kwargs,
|
284
293
|
params=param_kwargs,
|
294
|
+
title=title,
|
285
295
|
)
|
286
296
|
|
287
297
|
|
@@ -297,6 +307,7 @@ def run_test( # noqa: C901
|
|
297
307
|
generate_description: bool = True,
|
298
308
|
title: Optional[str] = None,
|
299
309
|
post_process_fn: Union[Callable[[TestResult], None], None] = None,
|
310
|
+
show_params: bool = True,
|
300
311
|
**kwargs,
|
301
312
|
) -> TestResult:
|
302
313
|
"""Run a ValidMind or custom test
|
@@ -321,6 +332,7 @@ def run_test( # noqa: C901
|
|
321
332
|
generate_description (bool, optional): Whether to generate a description. Defaults to True.
|
322
333
|
title (str, optional): Custom title for the test result
|
323
334
|
post_process_fn (Callable[[TestResult], None], optional): Function to post-process the test result
|
335
|
+
show_params (bool, optional): Whether to include parameter values in figure titles for comparison tests. Defaults to True.
|
324
336
|
|
325
337
|
Returns:
|
326
338
|
TestResult: A TestResult object containing the test results
|
@@ -358,6 +370,7 @@ def run_test( # noqa: C901
|
|
358
370
|
input_grid=input_grid,
|
359
371
|
params=params,
|
360
372
|
param_grid=param_grid,
|
373
|
+
show_params=show_params,
|
361
374
|
)
|
362
375
|
|
363
376
|
elif unit_metrics:
|
@@ -375,7 +388,7 @@ def run_test( # noqa: C901
|
|
375
388
|
)
|
376
389
|
|
377
390
|
else:
|
378
|
-
result = _run_test(test_id, inputs, params)
|
391
|
+
result = _run_test(test_id, inputs, params, title)
|
379
392
|
|
380
393
|
end_time = time.perf_counter()
|
381
394
|
result.metadata = _get_run_metadata(duration_seconds=end_time - start_time)
|
@@ -383,15 +396,16 @@ def run_test( # noqa: C901
|
|
383
396
|
if post_process_fn:
|
384
397
|
result = post_process_fn(result)
|
385
398
|
|
386
|
-
result.description
|
387
|
-
|
388
|
-
|
389
|
-
|
390
|
-
|
391
|
-
|
392
|
-
|
393
|
-
|
394
|
-
|
399
|
+
if not result.description:
|
400
|
+
result.description = get_result_description(
|
401
|
+
test_id=test_id,
|
402
|
+
test_description=result.doc,
|
403
|
+
tables=result.tables,
|
404
|
+
figures=result.figures,
|
405
|
+
metric=result.metric,
|
406
|
+
should_generate=generate_description,
|
407
|
+
title=title,
|
408
|
+
)
|
395
409
|
|
396
410
|
if show:
|
397
411
|
result.show()
|
@@ -7,7 +7,7 @@ import os
|
|
7
7
|
import re
|
8
8
|
import sys
|
9
9
|
from pathlib import Path
|
10
|
-
from typing import List, Protocol
|
10
|
+
from typing import Any, Callable, List, Protocol
|
11
11
|
|
12
12
|
from validmind.logging import get_logger
|
13
13
|
|
@@ -95,45 +95,38 @@ class LocalTestProvider:
|
|
95
95
|
"""
|
96
96
|
self.root_folder = os.path.abspath(root_folder)
|
97
97
|
|
98
|
-
def list_tests(self):
|
98
|
+
def list_tests(self) -> List[str]:
|
99
99
|
"""List all tests in the given namespace
|
100
100
|
|
101
101
|
Returns:
|
102
102
|
list: A list of test IDs
|
103
103
|
"""
|
104
|
-
|
105
|
-
|
104
|
+
test_files = []
|
106
105
|
for root, _, files in os.walk(self.root_folder):
|
107
|
-
for
|
108
|
-
if not
|
109
|
-
continue
|
110
|
-
|
111
|
-
path = Path(root) / filename
|
112
|
-
if not _is_test_file(path):
|
106
|
+
for file in files:
|
107
|
+
if not file.endswith(".py"):
|
113
108
|
continue
|
114
109
|
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
110
|
+
path = Path(os.path.join(root, file))
|
111
|
+
if _is_test_file(path):
|
112
|
+
rel_path = os.path.relpath(path, self.root_folder)
|
113
|
+
test_id = os.path.splitext(rel_path)[0].replace(os.sep, ".")
|
114
|
+
test_files.append(test_id)
|
120
115
|
|
121
|
-
return
|
116
|
+
return test_files
|
122
117
|
|
123
|
-
def load_test(self, test_id: str):
|
124
|
-
"""
|
125
|
-
Load the test identified by the given test_id.
|
118
|
+
def load_test(self, test_id: str) -> Callable[..., Any]:
|
119
|
+
"""Load the test function identified by the given test_id
|
126
120
|
|
127
121
|
Args:
|
128
|
-
test_id (str): The
|
129
|
-
|
122
|
+
test_id (str): The test ID (does not contain the namespace under which
|
123
|
+
the test is registered)
|
130
124
|
|
131
125
|
Returns:
|
132
|
-
The test
|
126
|
+
callable: The test function
|
133
127
|
|
134
128
|
Raises:
|
135
|
-
|
136
|
-
LocalTestProviderLoadTestError: If the test class cannot be found in the module
|
129
|
+
FileNotFoundError: If the test is not found
|
137
130
|
"""
|
138
131
|
# Convert test_id to file path
|
139
132
|
file_path = os.path.join(self.root_folder, f"{test_id.replace('.', '/')}.py")
|
@@ -162,28 +155,28 @@ class LocalTestProvider:
|
|
162
155
|
|
163
156
|
|
164
157
|
class ValidMindTestProvider:
|
165
|
-
"""
|
158
|
+
"""Provider for built-in ValidMind tests"""
|
166
159
|
|
167
|
-
def __init__(self):
|
160
|
+
def __init__(self) -> None:
|
168
161
|
# two subproviders: unit_metrics and normal tests
|
169
|
-
self.
|
162
|
+
self.unit_metrics_provider = LocalTestProvider(
|
170
163
|
os.path.join(os.path.dirname(__file__), "..", "unit_metrics")
|
171
164
|
)
|
172
|
-
self.
|
165
|
+
self.test_provider = LocalTestProvider(os.path.dirname(__file__))
|
173
166
|
|
174
167
|
def list_tests(self) -> List[str]:
|
175
|
-
"""List all tests in the
|
168
|
+
"""List all tests in the given namespace"""
|
176
169
|
metric_ids = [
|
177
|
-
f"unit_metrics.{test}" for test in self.
|
170
|
+
f"unit_metrics.{test}" for test in self.unit_metrics_provider.list_tests()
|
178
171
|
]
|
179
|
-
test_ids = self.
|
172
|
+
test_ids = self.test_provider.list_tests()
|
180
173
|
|
181
174
|
return metric_ids + test_ids
|
182
175
|
|
183
|
-
def load_test(self, test_id: str) ->
|
184
|
-
"""Load
|
176
|
+
def load_test(self, test_id: str) -> Callable[..., Any]:
|
177
|
+
"""Load the test function identified by the given test_id"""
|
185
178
|
return (
|
186
|
-
self.
|
179
|
+
self.unit_metrics_provider.load_test(test_id.replace("unit_metrics.", ""))
|
187
180
|
if test_id.startswith("unit_metrics.")
|
188
|
-
else self.
|
181
|
+
else self.test_provider.load_test(test_id)
|
189
182
|
)
|