validmind 2.8.10__py3-none-any.whl → 2.8.12__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (149) hide show
  1. validmind/__version__.py +1 -1
  2. validmind/ai/test_descriptions.py +4 -2
  3. validmind/tests/data_validation/ACFandPACFPlot.py +4 -1
  4. validmind/tests/data_validation/AutoMA.py +1 -1
  5. validmind/tests/data_validation/BivariateScatterPlots.py +5 -1
  6. validmind/tests/data_validation/BoxPierce.py +3 -1
  7. validmind/tests/data_validation/ClassImbalance.py +1 -1
  8. validmind/tests/data_validation/DatasetDescription.py +1 -1
  9. validmind/tests/data_validation/DickeyFullerGLS.py +1 -1
  10. validmind/tests/data_validation/FeatureTargetCorrelationPlot.py +1 -1
  11. validmind/tests/data_validation/HighCardinality.py +5 -1
  12. validmind/tests/data_validation/HighPearsonCorrelation.py +1 -1
  13. validmind/tests/data_validation/IQROutliersBarPlot.py +5 -3
  14. validmind/tests/data_validation/IQROutliersTable.py +5 -2
  15. validmind/tests/data_validation/IsolationForestOutliers.py +5 -4
  16. validmind/tests/data_validation/JarqueBera.py +2 -2
  17. validmind/tests/data_validation/LJungBox.py +2 -2
  18. validmind/tests/data_validation/LaggedCorrelationHeatmap.py +1 -1
  19. validmind/tests/data_validation/MissingValues.py +14 -10
  20. validmind/tests/data_validation/MissingValuesBarPlot.py +3 -1
  21. validmind/tests/data_validation/MutualInformation.py +2 -1
  22. validmind/tests/data_validation/PearsonCorrelationMatrix.py +1 -1
  23. validmind/tests/data_validation/ProtectedClassesCombination.py +2 -0
  24. validmind/tests/data_validation/ProtectedClassesDescription.py +2 -2
  25. validmind/tests/data_validation/ProtectedClassesDisparity.py +9 -5
  26. validmind/tests/data_validation/ProtectedClassesThresholdOptimizer.py +10 -2
  27. validmind/tests/data_validation/RollingStatsPlot.py +2 -1
  28. validmind/tests/data_validation/ScoreBandDefaultRates.py +4 -2
  29. validmind/tests/data_validation/SeasonalDecompose.py +1 -1
  30. validmind/tests/data_validation/ShapiroWilk.py +2 -2
  31. validmind/tests/data_validation/SpreadPlot.py +1 -1
  32. validmind/tests/data_validation/TabularCategoricalBarPlots.py +1 -1
  33. validmind/tests/data_validation/TabularDateTimeHistograms.py +1 -1
  34. validmind/tests/data_validation/TargetRateBarPlots.py +4 -1
  35. validmind/tests/data_validation/TimeSeriesFrequency.py +1 -1
  36. validmind/tests/data_validation/TimeSeriesOutliers.py +7 -2
  37. validmind/tests/data_validation/WOEBinPlots.py +1 -1
  38. validmind/tests/data_validation/WOEBinTable.py +1 -1
  39. validmind/tests/data_validation/ZivotAndrewsArch.py +5 -2
  40. validmind/tests/data_validation/nlp/CommonWords.py +1 -1
  41. validmind/tests/data_validation/nlp/Hashtags.py +1 -1
  42. validmind/tests/data_validation/nlp/LanguageDetection.py +1 -1
  43. validmind/tests/data_validation/nlp/Mentions.py +1 -1
  44. validmind/tests/data_validation/nlp/PolarityAndSubjectivity.py +5 -1
  45. validmind/tests/data_validation/nlp/Punctuations.py +1 -1
  46. validmind/tests/data_validation/nlp/Sentiment.py +3 -1
  47. validmind/tests/data_validation/nlp/TextDescription.py +1 -1
  48. validmind/tests/data_validation/nlp/Toxicity.py +1 -1
  49. validmind/tests/model_validation/BertScore.py +7 -1
  50. validmind/tests/model_validation/BleuScore.py +7 -1
  51. validmind/tests/model_validation/ClusterSizeDistribution.py +3 -1
  52. validmind/tests/model_validation/ContextualRecall.py +9 -1
  53. validmind/tests/model_validation/FeaturesAUC.py +1 -1
  54. validmind/tests/model_validation/MeteorScore.py +7 -1
  55. validmind/tests/model_validation/ModelPredictionResiduals.py +5 -1
  56. validmind/tests/model_validation/RegardScore.py +6 -1
  57. validmind/tests/model_validation/RegressionResidualsPlot.py +10 -1
  58. validmind/tests/model_validation/RougeScore.py +3 -1
  59. validmind/tests/model_validation/TimeSeriesPredictionWithCI.py +2 -0
  60. validmind/tests/model_validation/TimeSeriesPredictionsPlot.py +10 -2
  61. validmind/tests/model_validation/TimeSeriesR2SquareBySegments.py +6 -2
  62. validmind/tests/model_validation/TokenDisparity.py +5 -1
  63. validmind/tests/model_validation/ToxicityScore.py +2 -0
  64. validmind/tests/model_validation/embeddings/ClusterDistribution.py +1 -1
  65. validmind/tests/model_validation/embeddings/CosineSimilarityComparison.py +5 -1
  66. validmind/tests/model_validation/embeddings/CosineSimilarityDistribution.py +5 -1
  67. validmind/tests/model_validation/embeddings/CosineSimilarityHeatmap.py +5 -1
  68. validmind/tests/model_validation/embeddings/DescriptiveAnalytics.py +2 -0
  69. validmind/tests/model_validation/embeddings/EmbeddingsVisualization2D.py +5 -1
  70. validmind/tests/model_validation/embeddings/EuclideanDistanceComparison.py +6 -2
  71. validmind/tests/model_validation/embeddings/EuclideanDistanceHeatmap.py +3 -1
  72. validmind/tests/model_validation/embeddings/PCAComponentsPairwisePlots.py +4 -1
  73. validmind/tests/model_validation/embeddings/StabilityAnalysisKeyword.py +5 -1
  74. validmind/tests/model_validation/embeddings/StabilityAnalysisRandomNoise.py +5 -1
  75. validmind/tests/model_validation/embeddings/StabilityAnalysisSynonyms.py +5 -1
  76. validmind/tests/model_validation/embeddings/StabilityAnalysisTranslation.py +5 -1
  77. validmind/tests/model_validation/embeddings/TSNEComponentsPairwisePlots.py +6 -1
  78. validmind/tests/model_validation/ragas/AnswerCorrectness.py +1 -1
  79. validmind/tests/model_validation/ragas/AspectCritic.py +4 -1
  80. validmind/tests/model_validation/ragas/ContextEntityRecall.py +1 -1
  81. validmind/tests/model_validation/ragas/ContextPrecision.py +1 -1
  82. validmind/tests/model_validation/ragas/ContextPrecisionWithoutReference.py +1 -1
  83. validmind/tests/model_validation/ragas/ContextRecall.py +1 -1
  84. validmind/tests/model_validation/ragas/Faithfulness.py +1 -1
  85. validmind/tests/model_validation/ragas/NoiseSensitivity.py +1 -1
  86. validmind/tests/model_validation/ragas/ResponseRelevancy.py +1 -1
  87. validmind/tests/model_validation/ragas/SemanticSimilarity.py +1 -1
  88. validmind/tests/model_validation/sklearn/AdjustedMutualInformation.py +9 -9
  89. validmind/tests/model_validation/sklearn/AdjustedRandIndex.py +9 -9
  90. validmind/tests/model_validation/sklearn/CalibrationCurve.py +5 -2
  91. validmind/tests/model_validation/sklearn/ClassifierThresholdOptimization.py +15 -2
  92. validmind/tests/model_validation/sklearn/ClusterCosineSimilarity.py +5 -1
  93. validmind/tests/model_validation/sklearn/ClusterPerformanceMetrics.py +24 -14
  94. validmind/tests/model_validation/sklearn/CompletenessScore.py +8 -9
  95. validmind/tests/model_validation/sklearn/ConfusionMatrix.py +22 -3
  96. validmind/tests/model_validation/sklearn/FeatureImportance.py +6 -2
  97. validmind/tests/model_validation/sklearn/FowlkesMallowsScore.py +12 -9
  98. validmind/tests/model_validation/sklearn/HomogeneityScore.py +14 -9
  99. validmind/tests/model_validation/sklearn/HyperParametersTuning.py +4 -2
  100. validmind/tests/model_validation/sklearn/KMeansClustersOptimization.py +6 -1
  101. validmind/tests/model_validation/sklearn/MinimumAccuracy.py +12 -7
  102. validmind/tests/model_validation/sklearn/MinimumF1Score.py +12 -7
  103. validmind/tests/model_validation/sklearn/MinimumROCAUCScore.py +21 -6
  104. validmind/tests/model_validation/sklearn/OverfitDiagnosis.py +8 -2
  105. validmind/tests/model_validation/sklearn/PermutationFeatureImportance.py +5 -1
  106. validmind/tests/model_validation/sklearn/PopulationStabilityIndex.py +5 -1
  107. validmind/tests/model_validation/sklearn/PrecisionRecallCurve.py +6 -1
  108. validmind/tests/model_validation/sklearn/ROCCurve.py +3 -1
  109. validmind/tests/model_validation/sklearn/RegressionErrors.py +6 -2
  110. validmind/tests/model_validation/sklearn/RegressionPerformance.py +13 -8
  111. validmind/tests/model_validation/sklearn/RegressionR2Square.py +8 -5
  112. validmind/tests/model_validation/sklearn/RobustnessDiagnosis.py +5 -1
  113. validmind/tests/model_validation/sklearn/SHAPGlobalImportance.py +6 -1
  114. validmind/tests/model_validation/sklearn/ScoreProbabilityAlignment.py +10 -2
  115. validmind/tests/model_validation/sklearn/SilhouettePlot.py +5 -1
  116. validmind/tests/model_validation/sklearn/VMeasure.py +12 -9
  117. validmind/tests/model_validation/statsmodels/CumulativePredictionProbabilities.py +5 -1
  118. validmind/tests/model_validation/statsmodels/DurbinWatsonTest.py +6 -1
  119. validmind/tests/model_validation/statsmodels/GINITable.py +8 -1
  120. validmind/tests/model_validation/statsmodels/KolmogorovSmirnov.py +2 -2
  121. validmind/tests/model_validation/statsmodels/PredictionProbabilitiesHistogram.py +6 -2
  122. validmind/tests/model_validation/statsmodels/RegressionCoeffs.py +8 -2
  123. validmind/tests/model_validation/statsmodels/RegressionFeatureSignificance.py +3 -1
  124. validmind/tests/model_validation/statsmodels/RegressionModelForecastPlot.py +7 -2
  125. validmind/tests/model_validation/statsmodels/RegressionModelForecastPlotLevels.py +2 -0
  126. validmind/tests/model_validation/statsmodels/RegressionModelSensitivityPlot.py +2 -0
  127. validmind/tests/model_validation/statsmodels/RegressionModelSummary.py +4 -2
  128. validmind/tests/model_validation/statsmodels/RegressionPermutationFeatureImportance.py +3 -1
  129. validmind/tests/ongoing_monitoring/CalibrationCurveDrift.py +11 -1
  130. validmind/tests/ongoing_monitoring/ClassificationAccuracyDrift.py +10 -2
  131. validmind/tests/ongoing_monitoring/ConfusionMatrixDrift.py +8 -1
  132. validmind/tests/ongoing_monitoring/CumulativePredictionProbabilitiesDrift.py +18 -2
  133. validmind/tests/ongoing_monitoring/FeatureDrift.py +9 -2
  134. validmind/tests/ongoing_monitoring/PredictionAcrossEachFeature.py +8 -2
  135. validmind/tests/ongoing_monitoring/PredictionCorrelation.py +13 -2
  136. validmind/tests/ongoing_monitoring/PredictionProbabilitiesHistogramDrift.py +13 -2
  137. validmind/tests/ongoing_monitoring/ROCCurveDrift.py +16 -2
  138. validmind/tests/ongoing_monitoring/ScoreBandsDrift.py +11 -2
  139. validmind/tests/ongoing_monitoring/TargetPredictionDistributionPlot.py +13 -2
  140. validmind/tests/prompt_validation/Clarity.py +1 -1
  141. validmind/tests/prompt_validation/NegativeInstruction.py +1 -1
  142. validmind/tests/prompt_validation/Robustness.py +6 -1
  143. validmind/tests/prompt_validation/Specificity.py +1 -1
  144. validmind/vm_models/result/utils.py +4 -23
  145. {validmind-2.8.10.dist-info → validmind-2.8.12.dist-info}/METADATA +2 -2
  146. {validmind-2.8.10.dist-info → validmind-2.8.12.dist-info}/RECORD +149 -149
  147. {validmind-2.8.10.dist-info → validmind-2.8.12.dist-info}/LICENSE +0 -0
  148. {validmind-2.8.10.dist-info → validmind-2.8.12.dist-info}/WHEEL +0 -0
  149. {validmind-2.8.10.dist-info → validmind-2.8.12.dist-info}/entry_points.txt +0 -0
@@ -19,7 +19,11 @@ from validmind.vm_models import VMDataset, VMModel
19
19
  "visualization",
20
20
  )
21
21
  @tasks("classification", "text_classification")
22
- def ConfusionMatrix(dataset: VMDataset, model: VMModel):
22
+ def ConfusionMatrix(
23
+ dataset: VMDataset,
24
+ model: VMModel,
25
+ threshold: float = 0.5,
26
+ ):
23
27
  """
24
28
  Evaluates and visually represents the classification ML model's predictive performance using a Confusion Matrix
25
29
  heatmap.
@@ -66,7 +70,17 @@ def ConfusionMatrix(dataset: VMDataset, model: VMModel):
66
70
  - Risks of misinterpretation exist because the matrix doesn't directly provide precision, recall, or F1-score data.
67
71
  These metrics have to be computed separately.
68
72
  """
69
- y_pred = dataset.y_pred(model)
73
+ # Get predictions using threshold for binary classification if possible
74
+ if hasattr(model.model, "predict_proba"):
75
+ y_prob = dataset.y_prob(model)
76
+ # Handle both 1D and 2D probability arrays
77
+ if y_prob.ndim == 2:
78
+ y_pred = (y_prob[:, 1] > threshold).astype(int)
79
+ else:
80
+ y_pred = (y_prob > threshold).astype(int)
81
+ else:
82
+ y_pred = dataset.y_pred(model)
83
+
70
84
  y_true = dataset.y.astype(y_pred.dtype)
71
85
 
72
86
  labels = np.unique(y_true)
@@ -119,4 +133,9 @@ def ConfusionMatrix(dataset: VMDataset, model: VMModel):
119
133
  font=dict(size=14),
120
134
  )
121
135
 
122
- return fig, RawData(confusion_matrix=cm)
136
+ return fig, RawData(
137
+ confusion_matrix=cm,
138
+ threshold=threshold,
139
+ dataset=dataset.input_id,
140
+ model=model.input_id,
141
+ )
@@ -5,7 +5,7 @@
5
5
  import pandas as pd
6
6
  from sklearn.inspection import permutation_importance
7
7
 
8
- from validmind import tags, tasks
8
+ from validmind import RawData, tags, tasks
9
9
  from validmind.vm_models import VMDataset, VMModel
10
10
 
11
11
 
@@ -78,4 +78,8 @@ def FeatureImportance(dataset: VMDataset, model: VMModel, num_features: int = 3)
78
78
  else:
79
79
  result[f"Feature {i + 1}"] = None
80
80
 
81
- return pd.DataFrame([result])
81
+ return pd.DataFrame([result]), RawData(
82
+ permutation_importance=pfi_values,
83
+ model=model.input_id,
84
+ dataset=dataset.input_id,
85
+ )
@@ -4,7 +4,7 @@
4
4
 
5
5
  from sklearn import metrics
6
6
 
7
- from validmind import tags, tasks
7
+ from validmind import RawData, tags, tasks
8
8
  from validmind.vm_models import VMDataset, VMModel
9
9
 
10
10
 
@@ -52,11 +52,14 @@ def FowlkesMallowsScore(dataset: VMDataset, model: VMModel):
52
52
  - It does not handle mismatching numbers of clusters between the true and predicted labels. As such, it may return
53
53
  misleading results if the predicted labels suggest a different number of clusters than what is in the true labels.
54
54
  """
55
- return [
56
- {
57
- "Fowlkes-Mallows score": metrics.fowlkes_mallows_score(
58
- labels_true=dataset.y,
59
- labels_pred=dataset.y_pred(model),
60
- )
61
- }
62
- ]
55
+ fowlkes_mallows_score = metrics.fowlkes_mallows_score(
56
+ labels_true=dataset.y,
57
+ labels_pred=dataset.y_pred(model),
58
+ )
59
+
60
+ return [{"Fowlkes-Mallows score": fowlkes_mallows_score}], RawData(
61
+ labels_true=dataset.y,
62
+ labels_pred=dataset.y_pred(model),
63
+ model=model.input_id,
64
+ dataset=dataset.input_id,
65
+ )
@@ -4,7 +4,7 @@
4
4
 
5
5
  from sklearn import metrics
6
6
 
7
- from validmind import tags, tasks
7
+ from validmind import RawData, tags, tasks
8
8
  from validmind.vm_models import VMDataset, VMModel
9
9
 
10
10
 
@@ -50,11 +50,16 @@ def HomogeneityScore(dataset: VMDataset, model: VMModel):
50
50
  - The score does not address the actual number of clusters formed, or the evenness of cluster sizes. It only checks
51
51
  the homogeneity within the given clusters created by the model.
52
52
  """
53
- return [
54
- {
55
- "Homogeneity Score": metrics.homogeneity_score(
56
- labels_true=dataset.y,
57
- labels_pred=dataset.y_pred(model),
58
- )
59
- }
60
- ]
53
+ homogeneity_score = metrics.homogeneity_score(
54
+ labels_true=dataset.y,
55
+ labels_pred=dataset.y_pred(model),
56
+ )
57
+
58
+ raw_data = RawData(
59
+ y_true=dataset.y,
60
+ y_pred=dataset.y_pred(model),
61
+ model=model.input_id,
62
+ dataset=dataset.input_id,
63
+ )
64
+
65
+ return ([{"Homogeneity Score": homogeneity_score}], raw_data)
@@ -7,7 +7,7 @@ from typing import Dict, List, Union
7
7
  from sklearn.metrics import make_scorer, recall_score
8
8
  from sklearn.model_selection import GridSearchCV
9
9
 
10
- from validmind import tags, tasks
10
+ from validmind import RawData, tags, tasks
11
11
  from validmind.vm_models import VMDataset, VMModel
12
12
 
13
13
 
@@ -162,4 +162,6 @@ def HyperParametersTuning(
162
162
 
163
163
  results.append(row_result)
164
164
 
165
- return results
165
+ return results, RawData(
166
+ model=model.input_id, dataset=dataset.input_id, param_grid=param_grid
167
+ )
@@ -124,4 +124,9 @@ def KMeansClustersOptimization(
124
124
 
125
125
  fig.update_layout(showlegend=False)
126
126
 
127
- return fig, RawData(distortions=distortions, silhouette_avg=silhouette_avg)
127
+ return fig, RawData(
128
+ distortions=distortions,
129
+ silhouette_avg=silhouette_avg,
130
+ model=model.input_id,
131
+ dataset=dataset.input_id,
132
+ )
@@ -3,6 +3,7 @@
3
3
  # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
4
  from sklearn.metrics import accuracy_score
5
5
 
6
+ from validmind import RawData
6
7
  from validmind.tests import tags, tasks
7
8
  from validmind.vm_models import VMDataset, VMModel
8
9
 
@@ -50,10 +51,14 @@ def MinimumAccuracy(dataset: VMDataset, model: VMModel, min_threshold: float = 0
50
51
  """
51
52
  accuracy = accuracy_score(dataset.y, dataset.y_pred(model))
52
53
 
53
- return [
54
- {
55
- "Score": accuracy,
56
- "Threshold": min_threshold,
57
- "Pass/Fail": "Pass" if accuracy > min_threshold else "Fail",
58
- }
59
- ], accuracy > min_threshold
54
+ return (
55
+ [
56
+ {
57
+ "Score": accuracy,
58
+ "Threshold": min_threshold,
59
+ "Pass/Fail": "Pass" if accuracy > min_threshold else "Fail",
60
+ }
61
+ ],
62
+ accuracy > min_threshold,
63
+ RawData(model=model.input_id, dataset=dataset.input_id),
64
+ )
@@ -5,6 +5,7 @@
5
5
  import numpy as np
6
6
  from sklearn.metrics import f1_score
7
7
 
8
+ from validmind import RawData
8
9
  from validmind.tests import tags, tasks
9
10
  from validmind.vm_models import VMDataset, VMModel
10
11
 
@@ -58,10 +59,14 @@ def MinimumF1Score(dataset: VMDataset, model: VMModel, min_threshold: float = 0.
58
59
  else:
59
60
  score = f1_score(dataset.y, dataset.y_pred(model))
60
61
 
61
- return [
62
- {
63
- "Score": score,
64
- "Threshold": min_threshold,
65
- "Pass/Fail": "Pass" if score > min_threshold else "Fail",
66
- }
67
- ], score > min_threshold
62
+ return (
63
+ [
64
+ {
65
+ "Score": score,
66
+ "Threshold": min_threshold,
67
+ "Pass/Fail": "Pass" if score > min_threshold else "Fail",
68
+ }
69
+ ],
70
+ score > min_threshold,
71
+ RawData(score=score, model=model.input_id, dataset=dataset.input_id),
72
+ )
@@ -6,7 +6,7 @@ import numpy as np
6
6
  from sklearn.metrics import roc_auc_score
7
7
  from sklearn.preprocessing import LabelBinarizer
8
8
 
9
- from validmind import tags, tasks
9
+ from validmind import RawData, tags, tasks
10
10
  from validmind.vm_models import VMDataset, VMModel
11
11
 
12
12
 
@@ -62,19 +62,34 @@ def MinimumROCAUCScore(dataset: VMDataset, model: VMModel, min_threshold: float
62
62
  lb = LabelBinarizer()
63
63
  lb.fit(y_true)
64
64
 
65
+ y_true_binary = lb.transform(y_true)
66
+ y_score_binary = lb.transform(dataset.y_pred(model))
67
+
65
68
  roc_auc = roc_auc_score(
66
- y_true=lb.transform(y_true),
67
- y_score=lb.transform(dataset.y_pred(model)),
69
+ y_true=y_true_binary,
70
+ y_score=y_score_binary,
68
71
  average="macro",
69
72
  )
70
73
 
71
74
  else:
72
- roc_auc = roc_auc_score(y_true=y_true, y_score=dataset.y_prob(model))
75
+ y_score_prob = dataset.y_prob(model)
76
+ roc_auc = roc_auc_score(y_true=y_true, y_score=y_score_prob)
73
77
 
74
- return [
78
+ results = [
75
79
  {
76
80
  "Score": roc_auc,
77
81
  "Threshold": min_threshold,
78
82
  "Pass/Fail": "Pass" if roc_auc > min_threshold else "Fail",
79
83
  }
80
- ], roc_auc > min_threshold
84
+ ]
85
+
86
+ return (
87
+ results,
88
+ roc_auc > min_threshold,
89
+ RawData(
90
+ y_true=y_true,
91
+ roc_auc=roc_auc,
92
+ model=model.input_id,
93
+ dataset=dataset.input_id,
94
+ ),
95
+ )
@@ -10,7 +10,7 @@ import pandas as pd
10
10
  import seaborn as sns
11
11
  from sklearn import metrics
12
12
 
13
- from validmind import tags, tasks
13
+ from validmind import RawData, tags, tasks
14
14
  from validmind.logging import get_logger
15
15
  from validmind.vm_models import VMDataset, VMModel
16
16
 
@@ -299,4 +299,10 @@ def OverfitDiagnosis(
299
299
  }
300
300
  )
301
301
 
302
- return ({"Overfit Diagnosis": test_results}, *figures)
302
+ return (
303
+ {"Overfit Diagnosis": test_results},
304
+ *figures,
305
+ RawData(
306
+ model=model.input_id, datasets=[dataset.input_id for dataset in datasets]
307
+ ),
308
+ )
@@ -111,4 +111,8 @@ def PermutationFeatureImportance(
111
111
  height=figure_height,
112
112
  )
113
113
 
114
- return fig, RawData(permutation_importance=pfi_values)
114
+ return fig, RawData(
115
+ permutation_importance=pfi_values,
116
+ model=model.input_id,
117
+ dataset=dataset.input_id,
118
+ )
@@ -209,5 +209,9 @@ def PopulationStabilityIndex(
209
209
  ],
210
210
  },
211
211
  fig,
212
- RawData(psi_raw=psi_results),
212
+ RawData(
213
+ psi_raw=psi_results,
214
+ model=model.input_id,
215
+ datasets=[datasets[0].input_id, datasets[1].input_id],
216
+ ),
213
217
  )
@@ -83,4 +83,9 @@ def PrecisionRecallCurve(model: VMModel, dataset: VMDataset):
83
83
  ),
84
84
  )
85
85
 
86
- return fig, RawData(precision=precision, recall=recall)
86
+ return fig, RawData(
87
+ precision=precision,
88
+ recall=recall,
89
+ model=model.input_id,
90
+ dataset=dataset.input_id,
91
+ )
@@ -103,5 +103,7 @@ def ROCCurve(model: VMModel, dataset: VMDataset):
103
103
  height=500,
104
104
  ),
105
105
  ),
106
- RawData(fpr=fpr, tpr=tpr, auc=auc),
106
+ RawData(
107
+ fpr=fpr, tpr=tpr, auc=auc, model=model.input_id, dataset=dataset.input_id
108
+ ),
107
109
  )
@@ -6,7 +6,7 @@ import numpy as np
6
6
  import pandas as pd
7
7
  from sklearn import metrics
8
8
 
9
- from validmind import tags, tasks
9
+ from validmind import RawData, tags, tasks
10
10
 
11
11
 
12
12
  @tags("sklearn", "model_performance")
@@ -62,7 +62,11 @@ def RegressionErrors(model, dataset):
62
62
  y_pred = dataset.y_pred(model)
63
63
  y_true = y_true.astype(y_pred.dtype)
64
64
 
65
- return _regression_errors(y_true, y_pred)
65
+ results_df = _regression_errors(y_true, y_pred)
66
+
67
+ return results_df, RawData(
68
+ y_true=y_true, y_pred=y_pred, model=model.input_id, dataset=dataset.input_id
69
+ )
66
70
 
67
71
 
68
72
  def _regression_errors(y_true, y_pred):
@@ -5,7 +5,7 @@
5
5
  import numpy as np
6
6
  from sklearn.metrics import mean_absolute_error, mean_squared_error
7
7
 
8
- from validmind import tags, tasks
8
+ from validmind import RawData, tags, tasks
9
9
  from validmind.logging import get_logger
10
10
  from validmind.vm_models import VMDataset, VMModel
11
11
 
@@ -74,10 +74,15 @@ def RegressionPerformance(model: VMModel, dataset: VMDataset):
74
74
  # MBD calculation
75
75
  metrics["Mean Bias Deviation (MBD)"] = np.mean(y_pred - y_true)
76
76
 
77
- return [
78
- {
79
- "Metric": metric,
80
- "Value": value,
81
- }
82
- for metric, value in metrics.items()
83
- ]
77
+ return (
78
+ [
79
+ {
80
+ "Metric": metric,
81
+ "Value": value,
82
+ }
83
+ for metric, value in metrics.items()
84
+ ],
85
+ RawData(
86
+ y_true=y_true, y_pred=y_pred, model=model.input_id, dataset=dataset.input_id
87
+ ),
88
+ )
@@ -5,7 +5,7 @@
5
5
  import pandas as pd
6
6
  from sklearn import metrics
7
7
 
8
- from validmind import tags, tasks
8
+ from validmind import RawData, tags, tasks
9
9
  from validmind.tests.model_validation.statsmodels.statsutils import adj_r2_score
10
10
 
11
11
 
@@ -55,11 +55,14 @@ def RegressionR2Square(dataset, model):
55
55
  y_pred = dataset.y_pred(model)
56
56
  y_true = y_true.astype(y_pred.dtype)
57
57
 
58
+ r2 = metrics.r2_score(y_true, y_pred)
59
+ adj_r2 = adj_r2_score(y_true, y_pred, len(y_true), len(dataset.feature_columns))
60
+
58
61
  return pd.DataFrame(
59
62
  {
60
- "R-squared (R2) Score": [metrics.r2_score(y_true, y_pred)],
61
- "Adjusted R-squared (R2) Score": [
62
- adj_r2_score(y_true, y_pred, len(y_true), len(dataset.feature_columns))
63
- ],
63
+ "R-squared (R2) Score": [r2],
64
+ "Adjusted R-squared (R2) Score": [adj_r2],
64
65
  }
66
+ ), RawData(
67
+ y_true=y_true, y_pred=y_pred, model=model.input_id, dataset=dataset.input_id
65
68
  )
@@ -327,4 +327,8 @@ def RobustnessDiagnosis(
327
327
  results_df["Perturbation Size"] == 0.0, "Perturbation Size"
328
328
  ] = "Baseline (0.0)"
329
329
 
330
- return results_df, fig, all(results_df["Passed"])
330
+ return (
331
+ results_df,
332
+ fig,
333
+ all(results_df["Passed"]),
334
+ )
@@ -229,5 +229,10 @@ def SHAPGlobalImportance(
229
229
  return (
230
230
  generate_shap_plot("mean", shap_values, shap_sample),
231
231
  generate_shap_plot("summary", shap_values, shap_sample),
232
- RawData(shap_values=shap_values, shap_sample=shap_sample),
232
+ RawData(
233
+ shap_values=shap_values,
234
+ shap_sample=shap_sample,
235
+ model=model.input_id,
236
+ dataset=dataset.input_id,
237
+ ),
233
238
  )
@@ -5,7 +5,7 @@
5
5
  import pandas as pd
6
6
  import plotly.graph_objects as go
7
7
 
8
- from validmind import tags, tasks
8
+ from validmind import RawData, tags, tasks
9
9
  from validmind.vm_models import VMDataset, VMModel
10
10
 
11
11
 
@@ -128,4 +128,12 @@ def ScoreProbabilityAlignment(
128
128
  height=600,
129
129
  )
130
130
 
131
- return results_df, fig
131
+ # Include raw data for post-processing
132
+ raw_data = RawData(
133
+ score_bins=df[["score_bin", score_column]],
134
+ predicted_probabilities=df["probability"],
135
+ model=model.input_id,
136
+ dataset=dataset.input_id,
137
+ )
138
+
139
+ return results_df, fig, raw_data
@@ -110,5 +110,9 @@ def SilhouettePlot(model: VMModel, dataset: VMDataset):
110
110
  "Silhouette Score": silhouette_avg,
111
111
  },
112
112
  fig,
113
- RawData(sample_silhouette_values=sample_silhouette_values),
113
+ RawData(
114
+ sample_silhouette_values=sample_silhouette_values,
115
+ model=model.input_id,
116
+ dataset=dataset.input_id,
117
+ ),
114
118
  )
@@ -4,7 +4,7 @@
4
4
 
5
5
  from sklearn import metrics
6
6
 
7
- from validmind import tags, tasks
7
+ from validmind import RawData, tags, tasks
8
8
  from validmind.vm_models import VMDataset, VMModel
9
9
 
10
10
 
@@ -48,11 +48,14 @@ def VMeasure(dataset: VMDataset, model: VMModel):
48
48
  the other. The V Measure Score does not provide flexibility in assigning different weights to homogeneity and
49
49
  completeness.
50
50
  """
51
- return [
52
- {
53
- "V Measure": metrics.v_measure_score(
54
- labels_true=dataset.y,
55
- labels_pred=dataset.y_pred(model),
56
- )
57
- }
58
- ]
51
+ v_measure = metrics.v_measure_score(
52
+ labels_true=dataset.y,
53
+ labels_pred=dataset.y_pred(model),
54
+ )
55
+
56
+ return (
57
+ [{"V Measure": v_measure}],
58
+ RawData(
59
+ v_measure_score=v_measure, model=model.input_id, dataset=dataset.input_id
60
+ ),
61
+ )
@@ -64,7 +64,11 @@ def CumulativePredictionProbabilities(dataset, model, title="Cumulative Probabil
64
64
 
65
65
  fig, fig_data = _plot_cumulative_prob(df, dataset.target_column, title)
66
66
 
67
- return fig, RawData(cumulative_probabilities=fig_data)
67
+ return fig, RawData(
68
+ cumulative_probabilities=fig_data,
69
+ model=model.input_id,
70
+ dataset=dataset.input_id,
71
+ )
68
72
 
69
73
 
70
74
  def _plot_cumulative_prob(df, target_col, title):
@@ -5,7 +5,7 @@
5
5
  import pandas as pd
6
6
  from statsmodels.stats.stattools import durbin_watson
7
7
 
8
- from validmind import tags, tasks
8
+ from validmind import RawData, tags, tasks
9
9
 
10
10
 
11
11
  @tasks("regression")
@@ -81,4 +81,9 @@ def DurbinWatsonTest(dataset, model, threshold=[1.5, 2.5]):
81
81
  "threshold": [str(threshold)],
82
82
  "autocorrelation": [get_autocorrelation(dw_statistic, threshold)],
83
83
  }
84
+ ), RawData(
85
+ residuals=residuals,
86
+ dw_statistic=dw_statistic,
87
+ model=model.input_id,
88
+ dataset=dataset.input_id,
84
89
  )
@@ -6,7 +6,7 @@ import numpy as np
6
6
  import pandas as pd
7
7
  from sklearn.metrics import roc_auc_score, roc_curve
8
8
 
9
- from validmind import tags, tasks
9
+ from validmind import RawData, tags, tasks
10
10
 
11
11
 
12
12
  @tags("model_performance")
@@ -76,4 +76,11 @@ def GINITable(dataset, model):
76
76
  "GINI": [gini],
77
77
  "KS": [max(tpr - fpr)],
78
78
  }
79
+ ), RawData(
80
+ fpr=fpr,
81
+ tpr=tpr,
82
+ y_true=y_true,
83
+ y_prob=y_prob,
84
+ model=model.input_id,
85
+ dataset=dataset.input_id,
79
86
  )
@@ -4,7 +4,7 @@
4
4
 
5
5
  from statsmodels.stats.diagnostic import kstest_normal
6
6
 
7
- from validmind import tags, tasks
7
+ from validmind import RawData, tags, tasks
8
8
  from validmind.errors import InvalidTestParametersError
9
9
  from validmind.vm_models import VMDataset, VMModel
10
10
 
@@ -66,4 +66,4 @@ def KolmogorovSmirnov(model: VMModel, dataset: VMDataset, dist: str = "norm"):
66
66
  "P-Value": result["pvalue"],
67
67
  }
68
68
  for k, result in ks_values.items()
69
- ]
69
+ ], RawData(ks_values=ks_values, dataset=dataset.input_id)
@@ -6,7 +6,7 @@
6
6
  import plotly.graph_objects as go
7
7
  from matplotlib import cm
8
8
 
9
- from validmind import tags, tasks
9
+ from validmind import RawData, tags, tasks
10
10
 
11
11
 
12
12
  @tags("visualization", "credit_risk")
@@ -60,7 +60,11 @@ def PredictionProbabilitiesHistogram(
60
60
 
61
61
  fig = _plot_prob_histogram(df, dataset.target_column, title)
62
62
 
63
- return fig
63
+ return fig, RawData(
64
+ probabilities=df["probabilities"],
65
+ model=model.input_id,
66
+ dataset=dataset.input_id,
67
+ )
64
68
 
65
69
 
66
70
  def _plot_prob_histogram(df, target_col, title):
@@ -7,7 +7,7 @@ import pandas as pd
7
7
  import plotly.graph_objects as go
8
8
  from scipy import stats
9
9
 
10
- from validmind import tags, tasks
10
+ from validmind import RawData, tags, tasks
11
11
  from validmind.errors import SkipTestError
12
12
 
13
13
 
@@ -97,4 +97,10 @@ def RegressionCoeffs(model):
97
97
  yaxis_title="Coefficients",
98
98
  )
99
99
 
100
- return fig, coefficients
100
+ return (
101
+ fig,
102
+ coefficients,
103
+ RawData(
104
+ model=model.input_id, std_err=std_err, lower_ci=lower_ci, upper_ci=upper_ci
105
+ ),
106
+ )
@@ -90,4 +90,6 @@ def RegressionFeatureSignificance(
90
90
 
91
91
  plt.close()
92
92
 
93
- return fig, RawData(coefficients=coefficients, pvalues=pvalues)
93
+ return fig, RawData(
94
+ coefficients=coefficients, pvalues=pvalues, model=model.input_id
95
+ )
@@ -7,7 +7,7 @@ from typing import Union
7
7
  import matplotlib.pyplot as plt
8
8
  import pandas as pd
9
9
 
10
- from validmind import tags, tasks
10
+ from validmind import RawData, tags, tasks
11
11
  from validmind.logging import get_logger
12
12
  from validmind.vm_models import VMDataset, VMModel
13
13
 
@@ -87,4 +87,9 @@ def RegressionModelForecastPlot(
87
87
 
88
88
  plt.close()
89
89
 
90
- return fig
90
+ return fig, RawData(
91
+ observed_values=dataset.y.tolist(),
92
+ forecast_values=dataset.y_pred(model).tolist(),
93
+ model=model.input_id,
94
+ dataset=dataset.input_id,
95
+ )
@@ -94,4 +94,6 @@ def RegressionModelForecastPlotLevels(
94
94
  return fig, RawData(
95
95
  y_transformed=dataset_y_transformed,
96
96
  y_pred_transformed=y_pred_transformed,
97
+ model=model.input_id,
98
+ dataset=dataset.input_id,
97
99
  )