validmind 2.8.10__py3-none-any.whl → 2.8.20__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (189) hide show
  1. validmind/__init__.py +6 -5
  2. validmind/__version__.py +1 -1
  3. validmind/ai/test_descriptions.py +17 -11
  4. validmind/ai/utils.py +2 -2
  5. validmind/api_client.py +75 -32
  6. validmind/client.py +108 -100
  7. validmind/client_config.py +3 -3
  8. validmind/datasets/classification/__init__.py +7 -3
  9. validmind/datasets/credit_risk/lending_club.py +28 -16
  10. validmind/datasets/nlp/cnn_dailymail.py +10 -4
  11. validmind/datasets/regression/__init__.py +22 -5
  12. validmind/errors.py +17 -7
  13. validmind/input_registry.py +1 -1
  14. validmind/logging.py +44 -35
  15. validmind/models/foundation.py +2 -2
  16. validmind/models/function.py +10 -3
  17. validmind/template.py +30 -22
  18. validmind/test_suites/__init__.py +2 -2
  19. validmind/tests/_store.py +13 -4
  20. validmind/tests/comparison.py +65 -33
  21. validmind/tests/data_validation/ACFandPACFPlot.py +4 -1
  22. validmind/tests/data_validation/AutoMA.py +1 -1
  23. validmind/tests/data_validation/BivariateScatterPlots.py +5 -1
  24. validmind/tests/data_validation/BoxPierce.py +3 -1
  25. validmind/tests/data_validation/ClassImbalance.py +4 -2
  26. validmind/tests/data_validation/DatasetDescription.py +3 -24
  27. validmind/tests/data_validation/DescriptiveStatistics.py +1 -1
  28. validmind/tests/data_validation/DickeyFullerGLS.py +1 -1
  29. validmind/tests/data_validation/FeatureTargetCorrelationPlot.py +1 -1
  30. validmind/tests/data_validation/HighCardinality.py +5 -1
  31. validmind/tests/data_validation/HighPearsonCorrelation.py +1 -1
  32. validmind/tests/data_validation/IQROutliersBarPlot.py +5 -3
  33. validmind/tests/data_validation/IQROutliersTable.py +5 -2
  34. validmind/tests/data_validation/IsolationForestOutliers.py +5 -4
  35. validmind/tests/data_validation/JarqueBera.py +2 -2
  36. validmind/tests/data_validation/LJungBox.py +2 -2
  37. validmind/tests/data_validation/LaggedCorrelationHeatmap.py +1 -1
  38. validmind/tests/data_validation/MissingValues.py +14 -10
  39. validmind/tests/data_validation/MissingValuesBarPlot.py +3 -1
  40. validmind/tests/data_validation/MutualInformation.py +2 -1
  41. validmind/tests/data_validation/PearsonCorrelationMatrix.py +1 -1
  42. validmind/tests/data_validation/ProtectedClassesCombination.py +2 -0
  43. validmind/tests/data_validation/ProtectedClassesDescription.py +2 -2
  44. validmind/tests/data_validation/ProtectedClassesDisparity.py +9 -5
  45. validmind/tests/data_validation/ProtectedClassesThresholdOptimizer.py +10 -2
  46. validmind/tests/data_validation/RollingStatsPlot.py +2 -1
  47. validmind/tests/data_validation/ScoreBandDefaultRates.py +4 -2
  48. validmind/tests/data_validation/SeasonalDecompose.py +1 -1
  49. validmind/tests/data_validation/ShapiroWilk.py +2 -2
  50. validmind/tests/data_validation/Skewness.py +7 -6
  51. validmind/tests/data_validation/SpreadPlot.py +1 -1
  52. validmind/tests/data_validation/TabularCategoricalBarPlots.py +1 -1
  53. validmind/tests/data_validation/TabularDateTimeHistograms.py +1 -1
  54. validmind/tests/data_validation/TargetRateBarPlots.py +4 -1
  55. validmind/tests/data_validation/TimeSeriesFrequency.py +1 -1
  56. validmind/tests/data_validation/TimeSeriesOutliers.py +7 -2
  57. validmind/tests/data_validation/WOEBinPlots.py +1 -1
  58. validmind/tests/data_validation/WOEBinTable.py +1 -1
  59. validmind/tests/data_validation/ZivotAndrewsArch.py +5 -2
  60. validmind/tests/data_validation/nlp/CommonWords.py +1 -1
  61. validmind/tests/data_validation/nlp/Hashtags.py +1 -1
  62. validmind/tests/data_validation/nlp/LanguageDetection.py +1 -1
  63. validmind/tests/data_validation/nlp/Mentions.py +1 -1
  64. validmind/tests/data_validation/nlp/PolarityAndSubjectivity.py +5 -1
  65. validmind/tests/data_validation/nlp/Punctuations.py +1 -1
  66. validmind/tests/data_validation/nlp/Sentiment.py +3 -1
  67. validmind/tests/data_validation/nlp/TextDescription.py +1 -1
  68. validmind/tests/data_validation/nlp/Toxicity.py +1 -1
  69. validmind/tests/decorator.py +14 -11
  70. validmind/tests/load.py +38 -24
  71. validmind/tests/model_validation/BertScore.py +7 -1
  72. validmind/tests/model_validation/BleuScore.py +7 -1
  73. validmind/tests/model_validation/ClusterSizeDistribution.py +3 -1
  74. validmind/tests/model_validation/ContextualRecall.py +9 -1
  75. validmind/tests/model_validation/FeaturesAUC.py +1 -1
  76. validmind/tests/model_validation/MeteorScore.py +7 -1
  77. validmind/tests/model_validation/ModelPredictionResiduals.py +5 -1
  78. validmind/tests/model_validation/RegardScore.py +6 -1
  79. validmind/tests/model_validation/RegressionResidualsPlot.py +10 -1
  80. validmind/tests/model_validation/RougeScore.py +3 -1
  81. validmind/tests/model_validation/TimeSeriesPredictionWithCI.py +2 -0
  82. validmind/tests/model_validation/TimeSeriesPredictionsPlot.py +10 -2
  83. validmind/tests/model_validation/TimeSeriesR2SquareBySegments.py +6 -2
  84. validmind/tests/model_validation/TokenDisparity.py +5 -1
  85. validmind/tests/model_validation/ToxicityScore.py +2 -0
  86. validmind/tests/model_validation/embeddings/ClusterDistribution.py +1 -1
  87. validmind/tests/model_validation/embeddings/CosineSimilarityComparison.py +5 -1
  88. validmind/tests/model_validation/embeddings/CosineSimilarityDistribution.py +5 -1
  89. validmind/tests/model_validation/embeddings/CosineSimilarityHeatmap.py +5 -1
  90. validmind/tests/model_validation/embeddings/DescriptiveAnalytics.py +2 -0
  91. validmind/tests/model_validation/embeddings/EmbeddingsVisualization2D.py +5 -1
  92. validmind/tests/model_validation/embeddings/EuclideanDistanceComparison.py +6 -2
  93. validmind/tests/model_validation/embeddings/EuclideanDistanceHeatmap.py +3 -1
  94. validmind/tests/model_validation/embeddings/PCAComponentsPairwisePlots.py +4 -1
  95. validmind/tests/model_validation/embeddings/StabilityAnalysisKeyword.py +5 -1
  96. validmind/tests/model_validation/embeddings/StabilityAnalysisRandomNoise.py +5 -1
  97. validmind/tests/model_validation/embeddings/StabilityAnalysisSynonyms.py +5 -1
  98. validmind/tests/model_validation/embeddings/StabilityAnalysisTranslation.py +5 -1
  99. validmind/tests/model_validation/embeddings/TSNEComponentsPairwisePlots.py +6 -1
  100. validmind/tests/model_validation/ragas/AnswerCorrectness.py +5 -3
  101. validmind/tests/model_validation/ragas/AspectCritic.py +4 -1
  102. validmind/tests/model_validation/ragas/ContextEntityRecall.py +5 -3
  103. validmind/tests/model_validation/ragas/ContextPrecision.py +5 -3
  104. validmind/tests/model_validation/ragas/ContextPrecisionWithoutReference.py +5 -3
  105. validmind/tests/model_validation/ragas/ContextRecall.py +5 -3
  106. validmind/tests/model_validation/ragas/Faithfulness.py +5 -3
  107. validmind/tests/model_validation/ragas/NoiseSensitivity.py +1 -1
  108. validmind/tests/model_validation/ragas/ResponseRelevancy.py +5 -3
  109. validmind/tests/model_validation/ragas/SemanticSimilarity.py +5 -3
  110. validmind/tests/model_validation/sklearn/AdjustedMutualInformation.py +9 -9
  111. validmind/tests/model_validation/sklearn/AdjustedRandIndex.py +9 -9
  112. validmind/tests/model_validation/sklearn/CalibrationCurve.py +5 -2
  113. validmind/tests/model_validation/sklearn/ClassifierThresholdOptimization.py +28 -5
  114. validmind/tests/model_validation/sklearn/ClusterCosineSimilarity.py +5 -1
  115. validmind/tests/model_validation/sklearn/ClusterPerformanceMetrics.py +24 -14
  116. validmind/tests/model_validation/sklearn/CompletenessScore.py +8 -9
  117. validmind/tests/model_validation/sklearn/ConfusionMatrix.py +22 -3
  118. validmind/tests/model_validation/sklearn/FeatureImportance.py +6 -2
  119. validmind/tests/model_validation/sklearn/FowlkesMallowsScore.py +12 -9
  120. validmind/tests/model_validation/sklearn/HomogeneityScore.py +14 -9
  121. validmind/tests/model_validation/sklearn/HyperParametersTuning.py +4 -2
  122. validmind/tests/model_validation/sklearn/KMeansClustersOptimization.py +6 -1
  123. validmind/tests/model_validation/sklearn/MinimumAccuracy.py +12 -7
  124. validmind/tests/model_validation/sklearn/MinimumF1Score.py +12 -7
  125. validmind/tests/model_validation/sklearn/MinimumROCAUCScore.py +21 -6
  126. validmind/tests/model_validation/sklearn/OverfitDiagnosis.py +11 -3
  127. validmind/tests/model_validation/sklearn/PermutationFeatureImportance.py +5 -1
  128. validmind/tests/model_validation/sklearn/PopulationStabilityIndex.py +5 -1
  129. validmind/tests/model_validation/sklearn/PrecisionRecallCurve.py +6 -1
  130. validmind/tests/model_validation/sklearn/ROCCurve.py +3 -1
  131. validmind/tests/model_validation/sklearn/RegressionErrors.py +6 -2
  132. validmind/tests/model_validation/sklearn/RegressionPerformance.py +13 -8
  133. validmind/tests/model_validation/sklearn/RegressionR2Square.py +8 -5
  134. validmind/tests/model_validation/sklearn/RobustnessDiagnosis.py +5 -1
  135. validmind/tests/model_validation/sklearn/SHAPGlobalImportance.py +34 -26
  136. validmind/tests/model_validation/sklearn/ScoreProbabilityAlignment.py +10 -2
  137. validmind/tests/model_validation/sklearn/SilhouettePlot.py +5 -1
  138. validmind/tests/model_validation/sklearn/VMeasure.py +12 -9
  139. validmind/tests/model_validation/sklearn/WeakspotsDiagnosis.py +15 -10
  140. validmind/tests/model_validation/statsmodels/CumulativePredictionProbabilities.py +5 -1
  141. validmind/tests/model_validation/statsmodels/DurbinWatsonTest.py +6 -1
  142. validmind/tests/model_validation/statsmodels/GINITable.py +8 -1
  143. validmind/tests/model_validation/statsmodels/KolmogorovSmirnov.py +2 -2
  144. validmind/tests/model_validation/statsmodels/PredictionProbabilitiesHistogram.py +6 -2
  145. validmind/tests/model_validation/statsmodels/RegressionCoeffs.py +8 -2
  146. validmind/tests/model_validation/statsmodels/RegressionFeatureSignificance.py +3 -1
  147. validmind/tests/model_validation/statsmodels/RegressionModelForecastPlot.py +7 -2
  148. validmind/tests/model_validation/statsmodels/RegressionModelForecastPlotLevels.py +2 -0
  149. validmind/tests/model_validation/statsmodels/RegressionModelSensitivityPlot.py +2 -0
  150. validmind/tests/model_validation/statsmodels/RegressionModelSummary.py +4 -2
  151. validmind/tests/model_validation/statsmodels/RegressionPermutationFeatureImportance.py +3 -1
  152. validmind/tests/ongoing_monitoring/CalibrationCurveDrift.py +11 -1
  153. validmind/tests/ongoing_monitoring/ClassificationAccuracyDrift.py +10 -2
  154. validmind/tests/ongoing_monitoring/ConfusionMatrixDrift.py +8 -1
  155. validmind/tests/ongoing_monitoring/CumulativePredictionProbabilitiesDrift.py +18 -2
  156. validmind/tests/ongoing_monitoring/FeatureDrift.py +9 -2
  157. validmind/tests/ongoing_monitoring/PredictionAcrossEachFeature.py +8 -2
  158. validmind/tests/ongoing_monitoring/PredictionCorrelation.py +13 -2
  159. validmind/tests/ongoing_monitoring/PredictionProbabilitiesHistogramDrift.py +13 -2
  160. validmind/tests/ongoing_monitoring/ROCCurveDrift.py +16 -2
  161. validmind/tests/ongoing_monitoring/ScoreBandsDrift.py +11 -2
  162. validmind/tests/ongoing_monitoring/TargetPredictionDistributionPlot.py +13 -2
  163. validmind/tests/output.py +66 -11
  164. validmind/tests/prompt_validation/Clarity.py +1 -1
  165. validmind/tests/prompt_validation/NegativeInstruction.py +1 -1
  166. validmind/tests/prompt_validation/Robustness.py +6 -1
  167. validmind/tests/prompt_validation/Specificity.py +1 -1
  168. validmind/tests/run.py +28 -14
  169. validmind/tests/test_providers.py +28 -35
  170. validmind/tests/utils.py +17 -4
  171. validmind/unit_metrics/__init__.py +1 -1
  172. validmind/utils.py +295 -31
  173. validmind/vm_models/dataset/dataset.py +19 -16
  174. validmind/vm_models/dataset/utils.py +5 -3
  175. validmind/vm_models/figure.py +6 -6
  176. validmind/vm_models/input.py +6 -5
  177. validmind/vm_models/model.py +5 -5
  178. validmind/vm_models/result/result.py +122 -43
  179. validmind/vm_models/result/utils.py +9 -28
  180. validmind/vm_models/test_suite/__init__.py +5 -0
  181. validmind/vm_models/test_suite/runner.py +5 -5
  182. validmind/vm_models/test_suite/summary.py +20 -2
  183. validmind/vm_models/test_suite/test.py +6 -6
  184. validmind/vm_models/test_suite/test_suite.py +10 -10
  185. {validmind-2.8.10.dist-info → validmind-2.8.20.dist-info}/METADATA +4 -5
  186. {validmind-2.8.10.dist-info → validmind-2.8.20.dist-info}/RECORD +189 -188
  187. {validmind-2.8.10.dist-info → validmind-2.8.20.dist-info}/WHEEL +1 -1
  188. {validmind-2.8.10.dist-info → validmind-2.8.20.dist-info}/LICENSE +0 -0
  189. {validmind-2.8.10.dist-info → validmind-2.8.20.dist-info}/entry_points.txt +0 -0
@@ -6,7 +6,7 @@ import numpy as np
6
6
  from sklearn.metrics import roc_auc_score
7
7
  from sklearn.preprocessing import LabelBinarizer
8
8
 
9
- from validmind import tags, tasks
9
+ from validmind import RawData, tags, tasks
10
10
  from validmind.vm_models import VMDataset, VMModel
11
11
 
12
12
 
@@ -62,19 +62,34 @@ def MinimumROCAUCScore(dataset: VMDataset, model: VMModel, min_threshold: float
62
62
  lb = LabelBinarizer()
63
63
  lb.fit(y_true)
64
64
 
65
+ y_true_binary = lb.transform(y_true)
66
+ y_score_binary = lb.transform(dataset.y_pred(model))
67
+
65
68
  roc_auc = roc_auc_score(
66
- y_true=lb.transform(y_true),
67
- y_score=lb.transform(dataset.y_pred(model)),
69
+ y_true=y_true_binary,
70
+ y_score=y_score_binary,
68
71
  average="macro",
69
72
  )
70
73
 
71
74
  else:
72
- roc_auc = roc_auc_score(y_true=y_true, y_score=dataset.y_prob(model))
75
+ y_score_prob = dataset.y_prob(model)
76
+ roc_auc = roc_auc_score(y_true=y_true, y_score=y_score_prob)
73
77
 
74
- return [
78
+ results = [
75
79
  {
76
80
  "Score": roc_auc,
77
81
  "Threshold": min_threshold,
78
82
  "Pass/Fail": "Pass" if roc_auc > min_threshold else "Fail",
79
83
  }
80
- ], roc_auc > min_threshold
84
+ ]
85
+
86
+ return (
87
+ results,
88
+ roc_auc > min_threshold,
89
+ RawData(
90
+ y_true=y_true,
91
+ roc_auc=roc_auc,
92
+ model=model.input_id,
93
+ dataset=dataset.input_id,
94
+ ),
95
+ )
@@ -10,7 +10,7 @@ import pandas as pd
10
10
  import seaborn as sns
11
11
  from sklearn import metrics
12
12
 
13
- from validmind import tags, tasks
13
+ from validmind import RawData, tags, tasks
14
14
  from validmind.logging import get_logger
15
15
  from validmind.vm_models import VMDataset, VMModel
16
16
 
@@ -73,6 +73,7 @@ def _prepare_results(
73
73
  columns={"shape": "training records", f"{metric}": f"training {metric}"},
74
74
  inplace=True,
75
75
  )
76
+ results["test records"] = results_test["shape"]
76
77
  results[f"test {metric}"] = results_test[metric]
77
78
 
78
79
  # Adjust gap calculation based on metric directionality
@@ -292,11 +293,18 @@ def OverfitDiagnosis(
292
293
  {
293
294
  "Feature": feature_column,
294
295
  "Slice": row["slice"],
295
- "Number of Records": row["training records"],
296
+ "Number of Training Records": row["training records"],
297
+ "Number of Test Records": row["test records"],
296
298
  f"Training {metric.upper()}": row[f"training {metric}"],
297
299
  f"Test {metric.upper()}": row[f"test {metric}"],
298
300
  "Gap": row["gap"],
299
301
  }
300
302
  )
301
303
 
302
- return ({"Overfit Diagnosis": test_results}, *figures)
304
+ return (
305
+ {"Overfit Diagnosis": test_results},
306
+ *figures,
307
+ RawData(
308
+ model=model.input_id, datasets=[dataset.input_id for dataset in datasets]
309
+ ),
310
+ )
@@ -111,4 +111,8 @@ def PermutationFeatureImportance(
111
111
  height=figure_height,
112
112
  )
113
113
 
114
- return fig, RawData(permutation_importance=pfi_values)
114
+ return fig, RawData(
115
+ permutation_importance=pfi_values,
116
+ model=model.input_id,
117
+ dataset=dataset.input_id,
118
+ )
@@ -209,5 +209,9 @@ def PopulationStabilityIndex(
209
209
  ],
210
210
  },
211
211
  fig,
212
- RawData(psi_raw=psi_results),
212
+ RawData(
213
+ psi_raw=psi_results,
214
+ model=model.input_id,
215
+ datasets=[datasets[0].input_id, datasets[1].input_id],
216
+ ),
213
217
  )
@@ -83,4 +83,9 @@ def PrecisionRecallCurve(model: VMModel, dataset: VMDataset):
83
83
  ),
84
84
  )
85
85
 
86
- return fig, RawData(precision=precision, recall=recall)
86
+ return fig, RawData(
87
+ precision=precision,
88
+ recall=recall,
89
+ model=model.input_id,
90
+ dataset=dataset.input_id,
91
+ )
@@ -103,5 +103,7 @@ def ROCCurve(model: VMModel, dataset: VMDataset):
103
103
  height=500,
104
104
  ),
105
105
  ),
106
- RawData(fpr=fpr, tpr=tpr, auc=auc),
106
+ RawData(
107
+ fpr=fpr, tpr=tpr, auc=auc, model=model.input_id, dataset=dataset.input_id
108
+ ),
107
109
  )
@@ -6,7 +6,7 @@ import numpy as np
6
6
  import pandas as pd
7
7
  from sklearn import metrics
8
8
 
9
- from validmind import tags, tasks
9
+ from validmind import RawData, tags, tasks
10
10
 
11
11
 
12
12
  @tags("sklearn", "model_performance")
@@ -62,7 +62,11 @@ def RegressionErrors(model, dataset):
62
62
  y_pred = dataset.y_pred(model)
63
63
  y_true = y_true.astype(y_pred.dtype)
64
64
 
65
- return _regression_errors(y_true, y_pred)
65
+ results_df = _regression_errors(y_true, y_pred)
66
+
67
+ return results_df, RawData(
68
+ y_true=y_true, y_pred=y_pred, model=model.input_id, dataset=dataset.input_id
69
+ )
66
70
 
67
71
 
68
72
  def _regression_errors(y_true, y_pred):
@@ -5,7 +5,7 @@
5
5
  import numpy as np
6
6
  from sklearn.metrics import mean_absolute_error, mean_squared_error
7
7
 
8
- from validmind import tags, tasks
8
+ from validmind import RawData, tags, tasks
9
9
  from validmind.logging import get_logger
10
10
  from validmind.vm_models import VMDataset, VMModel
11
11
 
@@ -74,10 +74,15 @@ def RegressionPerformance(model: VMModel, dataset: VMDataset):
74
74
  # MBD calculation
75
75
  metrics["Mean Bias Deviation (MBD)"] = np.mean(y_pred - y_true)
76
76
 
77
- return [
78
- {
79
- "Metric": metric,
80
- "Value": value,
81
- }
82
- for metric, value in metrics.items()
83
- ]
77
+ return (
78
+ [
79
+ {
80
+ "Metric": metric,
81
+ "Value": value,
82
+ }
83
+ for metric, value in metrics.items()
84
+ ],
85
+ RawData(
86
+ y_true=y_true, y_pred=y_pred, model=model.input_id, dataset=dataset.input_id
87
+ ),
88
+ )
@@ -5,7 +5,7 @@
5
5
  import pandas as pd
6
6
  from sklearn import metrics
7
7
 
8
- from validmind import tags, tasks
8
+ from validmind import RawData, tags, tasks
9
9
  from validmind.tests.model_validation.statsmodels.statsutils import adj_r2_score
10
10
 
11
11
 
@@ -55,11 +55,14 @@ def RegressionR2Square(dataset, model):
55
55
  y_pred = dataset.y_pred(model)
56
56
  y_true = y_true.astype(y_pred.dtype)
57
57
 
58
+ r2 = metrics.r2_score(y_true, y_pred)
59
+ adj_r2 = adj_r2_score(y_true, y_pred, len(y_true), len(dataset.feature_columns))
60
+
58
61
  return pd.DataFrame(
59
62
  {
60
- "R-squared (R2) Score": [metrics.r2_score(y_true, y_pred)],
61
- "Adjusted R-squared (R2) Score": [
62
- adj_r2_score(y_true, y_pred, len(y_true), len(dataset.feature_columns))
63
- ],
63
+ "R-squared (R2) Score": [r2],
64
+ "Adjusted R-squared (R2) Score": [adj_r2],
64
65
  }
66
+ ), RawData(
67
+ y_true=y_true, y_pred=y_pred, model=model.input_id, dataset=dataset.input_id
65
68
  )
@@ -327,4 +327,8 @@ def RobustnessDiagnosis(
327
327
  results_df["Perturbation Size"] == 0.0, "Perturbation Size"
328
328
  ] = "Baseline (0.0)"
329
329
 
330
- return results_df, fig, all(results_df["Passed"])
330
+ return (
331
+ results_df,
332
+ fig,
333
+ all(results_df["Passed"]),
334
+ )
@@ -3,10 +3,12 @@
3
3
  # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
4
 
5
5
  import warnings
6
+ from typing import Dict, List, Optional, Union
6
7
  from warnings import filters as _warnings_filters
7
8
 
8
9
  import matplotlib.pyplot as plt
9
10
  import numpy as np
11
+ import pandas as pd
10
12
  import shap
11
13
 
12
14
  from validmind import RawData, tags, tasks
@@ -18,7 +20,10 @@ from validmind.vm_models import VMDataset, VMModel
18
20
  logger = get_logger(__name__)
19
21
 
20
22
 
21
- def select_shap_values(shap_values, class_of_interest):
23
+ def select_shap_values(
24
+ shap_values: Union[np.ndarray, List[np.ndarray]],
25
+ class_of_interest: Optional[int] = None,
26
+ ) -> np.ndarray:
22
27
  """Selects SHAP values for binary or multiclass classification.
23
28
 
24
29
  For regression models, returns the SHAP values directly as there are no classes.
@@ -41,32 +46,30 @@ def select_shap_values(shap_values, class_of_interest):
41
46
  """
42
47
  if not isinstance(shap_values, list):
43
48
  # For regression, return the SHAP values as they are
44
- # TODO: shap_values is always an array of all predictions, how is the if above supposed to work?
45
- # logger.info("Returning SHAP values as-is.")
46
- return shap_values
47
-
48
- num_classes = len(shap_values)
49
-
50
- # Default to class 1 for binary classification where no class is specified
51
- if num_classes == 2 and class_of_interest is None:
52
- logger.debug("Using SHAP values for class 1 (positive class).")
53
- return shap_values[1]
49
+ selected_values = shap_values
50
+ else:
51
+ num_classes = len(shap_values)
52
+ # Default to class 1 for binary classification where no class is specified
53
+ if num_classes == 2 and class_of_interest is None:
54
+ selected_values = shap_values[1]
55
+ # Otherwise, use the specified class_of_interest
56
+ elif class_of_interest is not None and 0 <= class_of_interest < num_classes:
57
+ selected_values = shap_values[class_of_interest]
58
+ else:
59
+ raise ValueError(
60
+ f"Invalid class_of_interest: {class_of_interest}. Must be between 0 and {num_classes - 1}."
61
+ )
54
62
 
55
- # Otherwise, use the specified class_of_interest
56
- if (
57
- class_of_interest is None
58
- or class_of_interest < 0
59
- or class_of_interest >= num_classes
60
- ):
61
- raise ValueError(
62
- f"Invalid class_of_interest: {class_of_interest}. Must be between 0 and {num_classes - 1}."
63
- )
63
+ # Add type conversion here to ensure proper float array
64
+ if hasattr(selected_values, "dtype"):
65
+ selected_values = np.array(selected_values, dtype=np.float64)
64
66
 
65
- logger.debug(f"Using SHAP values for class {class_of_interest}.")
66
- return shap_values[class_of_interest]
67
+ return selected_values
67
68
 
68
69
 
69
- def generate_shap_plot(type_, shap_values, x_test):
70
+ def generate_shap_plot(
71
+ type_: str, shap_values: np.ndarray, x_test: Union[np.ndarray, pd.DataFrame]
72
+ ) -> plt.Figure:
70
73
  """Plots two types of SHAP global importance (SHAP).
71
74
 
72
75
  Args:
@@ -117,8 +120,8 @@ def SHAPGlobalImportance(
117
120
  dataset: VMDataset,
118
121
  kernel_explainer_samples: int = 10,
119
122
  tree_or_linear_explainer_samples: int = 200,
120
- class_of_interest: int = None,
121
- ):
123
+ class_of_interest: Optional[int] = None,
124
+ ) -> Dict[str, Union[plt.Figure, Dict[str, float]]]:
122
125
  """
123
126
  Evaluates and visualizes global feature importance using SHAP values for model explanation and risk identification.
124
127
 
@@ -229,5 +232,10 @@ def SHAPGlobalImportance(
229
232
  return (
230
233
  generate_shap_plot("mean", shap_values, shap_sample),
231
234
  generate_shap_plot("summary", shap_values, shap_sample),
232
- RawData(shap_values=shap_values, shap_sample=shap_sample),
235
+ RawData(
236
+ shap_values=shap_values,
237
+ shap_sample=shap_sample,
238
+ model=model.input_id,
239
+ dataset=dataset.input_id,
240
+ ),
233
241
  )
@@ -5,7 +5,7 @@
5
5
  import pandas as pd
6
6
  import plotly.graph_objects as go
7
7
 
8
- from validmind import tags, tasks
8
+ from validmind import RawData, tags, tasks
9
9
  from validmind.vm_models import VMDataset, VMModel
10
10
 
11
11
 
@@ -128,4 +128,12 @@ def ScoreProbabilityAlignment(
128
128
  height=600,
129
129
  )
130
130
 
131
- return results_df, fig
131
+ # Include raw data for post-processing
132
+ raw_data = RawData(
133
+ score_bins=df[["score_bin", score_column]],
134
+ predicted_probabilities=df["probability"],
135
+ model=model.input_id,
136
+ dataset=dataset.input_id,
137
+ )
138
+
139
+ return results_df, fig, raw_data
@@ -110,5 +110,9 @@ def SilhouettePlot(model: VMModel, dataset: VMDataset):
110
110
  "Silhouette Score": silhouette_avg,
111
111
  },
112
112
  fig,
113
- RawData(sample_silhouette_values=sample_silhouette_values),
113
+ RawData(
114
+ sample_silhouette_values=sample_silhouette_values,
115
+ model=model.input_id,
116
+ dataset=dataset.input_id,
117
+ ),
114
118
  )
@@ -4,7 +4,7 @@
4
4
 
5
5
  from sklearn import metrics
6
6
 
7
- from validmind import tags, tasks
7
+ from validmind import RawData, tags, tasks
8
8
  from validmind.vm_models import VMDataset, VMModel
9
9
 
10
10
 
@@ -48,11 +48,14 @@ def VMeasure(dataset: VMDataset, model: VMModel):
48
48
  the other. The V Measure Score does not provide flexibility in assigning different weights to homogeneity and
49
49
  completeness.
50
50
  """
51
- return [
52
- {
53
- "V Measure": metrics.v_measure_score(
54
- labels_true=dataset.y,
55
- labels_pred=dataset.y_pred(model),
56
- )
57
- }
58
- ]
51
+ v_measure = metrics.v_measure_score(
52
+ labels_true=dataset.y,
53
+ labels_pred=dataset.y_pred(model),
54
+ )
55
+
56
+ return (
57
+ [{"V Measure": v_measure}],
58
+ RawData(
59
+ v_measure_score=v_measure, model=model.input_id, dataset=dataset.input_id
60
+ ),
61
+ )
@@ -47,7 +47,7 @@ def _compute_metrics(
47
47
  None: The computed metrics are appended to the `results` dictionary in-place.
48
48
  """
49
49
  results["Slice"].append(str(region))
50
- results["Shape"].append(df_region.shape[0])
50
+ results["Number of Records"].append(df_region.shape[0])
51
51
  results["Feature"].append(feature_column)
52
52
 
53
53
  # Check if df_region is an empty dataframe and if so, append 0 to all metrics
@@ -222,7 +222,7 @@ def WeakspotsDiagnosis(
222
222
  thresholds = thresholds or DEFAULT_THRESHOLDS
223
223
  thresholds = {k.title(): v for k, v in thresholds.items()}
224
224
 
225
- results_headers = ["Slice", "Shape", "Feature"]
225
+ results_headers = ["Slice", "Number of Records", "Feature"]
226
226
  results_headers.extend(metrics.keys())
227
227
 
228
228
  figures = []
@@ -236,19 +236,20 @@ def WeakspotsDiagnosis(
236
236
  feature_columns
237
237
  + [datasets[1].target_column, datasets[1].prediction_column(model)]
238
238
  ]
239
-
239
+ results_1 = pd.DataFrame()
240
+ results_2 = pd.DataFrame()
240
241
  for feature in feature_columns:
241
242
  bins = 10
242
243
  if feature in datasets[0].feature_columns_categorical:
243
244
  bins = len(df_1[feature].unique())
244
245
  df_1["bin"] = pd.cut(df_1[feature], bins=bins)
245
246
 
246
- results_1 = {k: [] for k in results_headers}
247
- results_2 = {k: [] for k in results_headers}
247
+ r1 = {k: [] for k in results_headers}
248
+ r2 = {k: [] for k in results_headers}
248
249
 
249
250
  for region, df_region in df_1.groupby("bin"):
250
251
  _compute_metrics(
251
- results=results_1,
252
+ results=r1,
252
253
  metrics=metrics,
253
254
  region=region,
254
255
  df_region=df_region,
@@ -260,7 +261,7 @@ def WeakspotsDiagnosis(
260
261
  (df_2[feature] > region.left) & (df_2[feature] <= region.right)
261
262
  ]
262
263
  _compute_metrics(
263
- results=results_2,
264
+ results=r2,
264
265
  metrics=metrics,
265
266
  region=region,
266
267
  df_region=df_2_region,
@@ -271,8 +272,8 @@ def WeakspotsDiagnosis(
271
272
 
272
273
  for metric in metrics.keys():
273
274
  fig, df = _plot_weak_spots(
274
- results_1=results_1,
275
- results_2=results_2,
275
+ results_1=r1,
276
+ results_2=r2,
276
277
  feature_column=feature,
277
278
  metric=metric,
278
279
  threshold=thresholds[metric],
@@ -284,6 +285,8 @@ def WeakspotsDiagnosis(
284
285
  # rely on visual assessment for this test for now.
285
286
  if not df[df[list(thresholds.keys())].lt(thresholds).any(axis=1)].empty:
286
287
  passed = False
288
+ results_1 = pd.concat([results_1, pd.DataFrame(r1)])
289
+ results_2 = pd.concat([results_2, pd.DataFrame(r2)])
287
290
 
288
291
  return (
289
292
  pd.concat(
@@ -291,7 +294,9 @@ def WeakspotsDiagnosis(
291
294
  pd.DataFrame(results_1).assign(Dataset=datasets[0].input_id),
292
295
  pd.DataFrame(results_2).assign(Dataset=datasets[1].input_id),
293
296
  ]
294
- ).sort_values(["Feature", "Dataset"]),
297
+ )
298
+ .reset_index(drop=True)
299
+ .sort_values(["Feature", "Dataset"]),
295
300
  *figures,
296
301
  passed,
297
302
  )
@@ -64,7 +64,11 @@ def CumulativePredictionProbabilities(dataset, model, title="Cumulative Probabil
64
64
 
65
65
  fig, fig_data = _plot_cumulative_prob(df, dataset.target_column, title)
66
66
 
67
- return fig, RawData(cumulative_probabilities=fig_data)
67
+ return fig, RawData(
68
+ cumulative_probabilities=fig_data,
69
+ model=model.input_id,
70
+ dataset=dataset.input_id,
71
+ )
68
72
 
69
73
 
70
74
  def _plot_cumulative_prob(df, target_col, title):
@@ -5,7 +5,7 @@
5
5
  import pandas as pd
6
6
  from statsmodels.stats.stattools import durbin_watson
7
7
 
8
- from validmind import tags, tasks
8
+ from validmind import RawData, tags, tasks
9
9
 
10
10
 
11
11
  @tasks("regression")
@@ -81,4 +81,9 @@ def DurbinWatsonTest(dataset, model, threshold=[1.5, 2.5]):
81
81
  "threshold": [str(threshold)],
82
82
  "autocorrelation": [get_autocorrelation(dw_statistic, threshold)],
83
83
  }
84
+ ), RawData(
85
+ residuals=residuals,
86
+ dw_statistic=dw_statistic,
87
+ model=model.input_id,
88
+ dataset=dataset.input_id,
84
89
  )
@@ -6,7 +6,7 @@ import numpy as np
6
6
  import pandas as pd
7
7
  from sklearn.metrics import roc_auc_score, roc_curve
8
8
 
9
- from validmind import tags, tasks
9
+ from validmind import RawData, tags, tasks
10
10
 
11
11
 
12
12
  @tags("model_performance")
@@ -76,4 +76,11 @@ def GINITable(dataset, model):
76
76
  "GINI": [gini],
77
77
  "KS": [max(tpr - fpr)],
78
78
  }
79
+ ), RawData(
80
+ fpr=fpr,
81
+ tpr=tpr,
82
+ y_true=y_true,
83
+ y_prob=y_prob,
84
+ model=model.input_id,
85
+ dataset=dataset.input_id,
79
86
  )
@@ -4,7 +4,7 @@
4
4
 
5
5
  from statsmodels.stats.diagnostic import kstest_normal
6
6
 
7
- from validmind import tags, tasks
7
+ from validmind import RawData, tags, tasks
8
8
  from validmind.errors import InvalidTestParametersError
9
9
  from validmind.vm_models import VMDataset, VMModel
10
10
 
@@ -66,4 +66,4 @@ def KolmogorovSmirnov(model: VMModel, dataset: VMDataset, dist: str = "norm"):
66
66
  "P-Value": result["pvalue"],
67
67
  }
68
68
  for k, result in ks_values.items()
69
- ]
69
+ ], RawData(ks_values=ks_values, dataset=dataset.input_id)
@@ -6,7 +6,7 @@
6
6
  import plotly.graph_objects as go
7
7
  from matplotlib import cm
8
8
 
9
- from validmind import tags, tasks
9
+ from validmind import RawData, tags, tasks
10
10
 
11
11
 
12
12
  @tags("visualization", "credit_risk")
@@ -60,7 +60,11 @@ def PredictionProbabilitiesHistogram(
60
60
 
61
61
  fig = _plot_prob_histogram(df, dataset.target_column, title)
62
62
 
63
- return fig
63
+ return fig, RawData(
64
+ probabilities=df["probabilities"],
65
+ model=model.input_id,
66
+ dataset=dataset.input_id,
67
+ )
64
68
 
65
69
 
66
70
  def _plot_prob_histogram(df, target_col, title):
@@ -7,7 +7,7 @@ import pandas as pd
7
7
  import plotly.graph_objects as go
8
8
  from scipy import stats
9
9
 
10
- from validmind import tags, tasks
10
+ from validmind import RawData, tags, tasks
11
11
  from validmind.errors import SkipTestError
12
12
 
13
13
 
@@ -97,4 +97,10 @@ def RegressionCoeffs(model):
97
97
  yaxis_title="Coefficients",
98
98
  )
99
99
 
100
- return fig, coefficients
100
+ return (
101
+ fig,
102
+ coefficients,
103
+ RawData(
104
+ model=model.input_id, std_err=std_err, lower_ci=lower_ci, upper_ci=upper_ci
105
+ ),
106
+ )
@@ -90,4 +90,6 @@ def RegressionFeatureSignificance(
90
90
 
91
91
  plt.close()
92
92
 
93
- return fig, RawData(coefficients=coefficients, pvalues=pvalues)
93
+ return fig, RawData(
94
+ coefficients=coefficients, pvalues=pvalues, model=model.input_id
95
+ )
@@ -7,7 +7,7 @@ from typing import Union
7
7
  import matplotlib.pyplot as plt
8
8
  import pandas as pd
9
9
 
10
- from validmind import tags, tasks
10
+ from validmind import RawData, tags, tasks
11
11
  from validmind.logging import get_logger
12
12
  from validmind.vm_models import VMDataset, VMModel
13
13
 
@@ -87,4 +87,9 @@ def RegressionModelForecastPlot(
87
87
 
88
88
  plt.close()
89
89
 
90
- return fig
90
+ return fig, RawData(
91
+ observed_values=dataset.y.tolist(),
92
+ forecast_values=dataset.y_pred(model).tolist(),
93
+ model=model.input_id,
94
+ dataset=dataset.input_id,
95
+ )
@@ -94,4 +94,6 @@ def RegressionModelForecastPlotLevels(
94
94
  return fig, RawData(
95
95
  y_transformed=dataset_y_transformed,
96
96
  y_pred_transformed=y_pred_transformed,
97
+ model=model.input_id,
98
+ dataset=dataset.input_id,
97
99
  )
@@ -114,4 +114,6 @@ def RegressionModelSensitivityPlot(
114
114
  return fig, RawData(
115
115
  transformed_target=transformed_target,
116
116
  transformed_predictions=transformed_predictions,
117
+ model=model.input_id,
118
+ dataset=dataset.input_id,
117
119
  )
@@ -4,7 +4,7 @@
4
4
 
5
5
  from sklearn.metrics import mean_squared_error, r2_score
6
6
 
7
- from validmind import tags, tasks
7
+ from validmind import RawData, tags, tasks
8
8
  from validmind.vm_models import VMDataset, VMModel
9
9
 
10
10
  from .statsutils import adj_r2_score
@@ -58,4 +58,6 @@ def RegressionModelSummary(dataset: VMDataset, model: VMModel):
58
58
  "MSE": mean_squared_error(y_true, y_pred, squared=True),
59
59
  "RMSE": mean_squared_error(y_true, y_pred, squared=False),
60
60
  }
61
- ]
61
+ ], RawData(
62
+ y_true=y_true, y_pred=y_pred, model=model.input_id, dataset=dataset.input_id
63
+ )
@@ -97,4 +97,6 @@ def RegressionPermutationFeatureImportance(
97
97
  height=figure_height,
98
98
  )
99
99
 
100
- return fig, RawData(importances=importances)
100
+ return fig, RawData(
101
+ importances=importances, model=model.input_id, dataset=dataset.input_id
102
+ )