validmind 2.8.10__py3-none-any.whl → 2.8.20__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (189) hide show
  1. validmind/__init__.py +6 -5
  2. validmind/__version__.py +1 -1
  3. validmind/ai/test_descriptions.py +17 -11
  4. validmind/ai/utils.py +2 -2
  5. validmind/api_client.py +75 -32
  6. validmind/client.py +108 -100
  7. validmind/client_config.py +3 -3
  8. validmind/datasets/classification/__init__.py +7 -3
  9. validmind/datasets/credit_risk/lending_club.py +28 -16
  10. validmind/datasets/nlp/cnn_dailymail.py +10 -4
  11. validmind/datasets/regression/__init__.py +22 -5
  12. validmind/errors.py +17 -7
  13. validmind/input_registry.py +1 -1
  14. validmind/logging.py +44 -35
  15. validmind/models/foundation.py +2 -2
  16. validmind/models/function.py +10 -3
  17. validmind/template.py +30 -22
  18. validmind/test_suites/__init__.py +2 -2
  19. validmind/tests/_store.py +13 -4
  20. validmind/tests/comparison.py +65 -33
  21. validmind/tests/data_validation/ACFandPACFPlot.py +4 -1
  22. validmind/tests/data_validation/AutoMA.py +1 -1
  23. validmind/tests/data_validation/BivariateScatterPlots.py +5 -1
  24. validmind/tests/data_validation/BoxPierce.py +3 -1
  25. validmind/tests/data_validation/ClassImbalance.py +4 -2
  26. validmind/tests/data_validation/DatasetDescription.py +3 -24
  27. validmind/tests/data_validation/DescriptiveStatistics.py +1 -1
  28. validmind/tests/data_validation/DickeyFullerGLS.py +1 -1
  29. validmind/tests/data_validation/FeatureTargetCorrelationPlot.py +1 -1
  30. validmind/tests/data_validation/HighCardinality.py +5 -1
  31. validmind/tests/data_validation/HighPearsonCorrelation.py +1 -1
  32. validmind/tests/data_validation/IQROutliersBarPlot.py +5 -3
  33. validmind/tests/data_validation/IQROutliersTable.py +5 -2
  34. validmind/tests/data_validation/IsolationForestOutliers.py +5 -4
  35. validmind/tests/data_validation/JarqueBera.py +2 -2
  36. validmind/tests/data_validation/LJungBox.py +2 -2
  37. validmind/tests/data_validation/LaggedCorrelationHeatmap.py +1 -1
  38. validmind/tests/data_validation/MissingValues.py +14 -10
  39. validmind/tests/data_validation/MissingValuesBarPlot.py +3 -1
  40. validmind/tests/data_validation/MutualInformation.py +2 -1
  41. validmind/tests/data_validation/PearsonCorrelationMatrix.py +1 -1
  42. validmind/tests/data_validation/ProtectedClassesCombination.py +2 -0
  43. validmind/tests/data_validation/ProtectedClassesDescription.py +2 -2
  44. validmind/tests/data_validation/ProtectedClassesDisparity.py +9 -5
  45. validmind/tests/data_validation/ProtectedClassesThresholdOptimizer.py +10 -2
  46. validmind/tests/data_validation/RollingStatsPlot.py +2 -1
  47. validmind/tests/data_validation/ScoreBandDefaultRates.py +4 -2
  48. validmind/tests/data_validation/SeasonalDecompose.py +1 -1
  49. validmind/tests/data_validation/ShapiroWilk.py +2 -2
  50. validmind/tests/data_validation/Skewness.py +7 -6
  51. validmind/tests/data_validation/SpreadPlot.py +1 -1
  52. validmind/tests/data_validation/TabularCategoricalBarPlots.py +1 -1
  53. validmind/tests/data_validation/TabularDateTimeHistograms.py +1 -1
  54. validmind/tests/data_validation/TargetRateBarPlots.py +4 -1
  55. validmind/tests/data_validation/TimeSeriesFrequency.py +1 -1
  56. validmind/tests/data_validation/TimeSeriesOutliers.py +7 -2
  57. validmind/tests/data_validation/WOEBinPlots.py +1 -1
  58. validmind/tests/data_validation/WOEBinTable.py +1 -1
  59. validmind/tests/data_validation/ZivotAndrewsArch.py +5 -2
  60. validmind/tests/data_validation/nlp/CommonWords.py +1 -1
  61. validmind/tests/data_validation/nlp/Hashtags.py +1 -1
  62. validmind/tests/data_validation/nlp/LanguageDetection.py +1 -1
  63. validmind/tests/data_validation/nlp/Mentions.py +1 -1
  64. validmind/tests/data_validation/nlp/PolarityAndSubjectivity.py +5 -1
  65. validmind/tests/data_validation/nlp/Punctuations.py +1 -1
  66. validmind/tests/data_validation/nlp/Sentiment.py +3 -1
  67. validmind/tests/data_validation/nlp/TextDescription.py +1 -1
  68. validmind/tests/data_validation/nlp/Toxicity.py +1 -1
  69. validmind/tests/decorator.py +14 -11
  70. validmind/tests/load.py +38 -24
  71. validmind/tests/model_validation/BertScore.py +7 -1
  72. validmind/tests/model_validation/BleuScore.py +7 -1
  73. validmind/tests/model_validation/ClusterSizeDistribution.py +3 -1
  74. validmind/tests/model_validation/ContextualRecall.py +9 -1
  75. validmind/tests/model_validation/FeaturesAUC.py +1 -1
  76. validmind/tests/model_validation/MeteorScore.py +7 -1
  77. validmind/tests/model_validation/ModelPredictionResiduals.py +5 -1
  78. validmind/tests/model_validation/RegardScore.py +6 -1
  79. validmind/tests/model_validation/RegressionResidualsPlot.py +10 -1
  80. validmind/tests/model_validation/RougeScore.py +3 -1
  81. validmind/tests/model_validation/TimeSeriesPredictionWithCI.py +2 -0
  82. validmind/tests/model_validation/TimeSeriesPredictionsPlot.py +10 -2
  83. validmind/tests/model_validation/TimeSeriesR2SquareBySegments.py +6 -2
  84. validmind/tests/model_validation/TokenDisparity.py +5 -1
  85. validmind/tests/model_validation/ToxicityScore.py +2 -0
  86. validmind/tests/model_validation/embeddings/ClusterDistribution.py +1 -1
  87. validmind/tests/model_validation/embeddings/CosineSimilarityComparison.py +5 -1
  88. validmind/tests/model_validation/embeddings/CosineSimilarityDistribution.py +5 -1
  89. validmind/tests/model_validation/embeddings/CosineSimilarityHeatmap.py +5 -1
  90. validmind/tests/model_validation/embeddings/DescriptiveAnalytics.py +2 -0
  91. validmind/tests/model_validation/embeddings/EmbeddingsVisualization2D.py +5 -1
  92. validmind/tests/model_validation/embeddings/EuclideanDistanceComparison.py +6 -2
  93. validmind/tests/model_validation/embeddings/EuclideanDistanceHeatmap.py +3 -1
  94. validmind/tests/model_validation/embeddings/PCAComponentsPairwisePlots.py +4 -1
  95. validmind/tests/model_validation/embeddings/StabilityAnalysisKeyword.py +5 -1
  96. validmind/tests/model_validation/embeddings/StabilityAnalysisRandomNoise.py +5 -1
  97. validmind/tests/model_validation/embeddings/StabilityAnalysisSynonyms.py +5 -1
  98. validmind/tests/model_validation/embeddings/StabilityAnalysisTranslation.py +5 -1
  99. validmind/tests/model_validation/embeddings/TSNEComponentsPairwisePlots.py +6 -1
  100. validmind/tests/model_validation/ragas/AnswerCorrectness.py +5 -3
  101. validmind/tests/model_validation/ragas/AspectCritic.py +4 -1
  102. validmind/tests/model_validation/ragas/ContextEntityRecall.py +5 -3
  103. validmind/tests/model_validation/ragas/ContextPrecision.py +5 -3
  104. validmind/tests/model_validation/ragas/ContextPrecisionWithoutReference.py +5 -3
  105. validmind/tests/model_validation/ragas/ContextRecall.py +5 -3
  106. validmind/tests/model_validation/ragas/Faithfulness.py +5 -3
  107. validmind/tests/model_validation/ragas/NoiseSensitivity.py +1 -1
  108. validmind/tests/model_validation/ragas/ResponseRelevancy.py +5 -3
  109. validmind/tests/model_validation/ragas/SemanticSimilarity.py +5 -3
  110. validmind/tests/model_validation/sklearn/AdjustedMutualInformation.py +9 -9
  111. validmind/tests/model_validation/sklearn/AdjustedRandIndex.py +9 -9
  112. validmind/tests/model_validation/sklearn/CalibrationCurve.py +5 -2
  113. validmind/tests/model_validation/sklearn/ClassifierThresholdOptimization.py +28 -5
  114. validmind/tests/model_validation/sklearn/ClusterCosineSimilarity.py +5 -1
  115. validmind/tests/model_validation/sklearn/ClusterPerformanceMetrics.py +24 -14
  116. validmind/tests/model_validation/sklearn/CompletenessScore.py +8 -9
  117. validmind/tests/model_validation/sklearn/ConfusionMatrix.py +22 -3
  118. validmind/tests/model_validation/sklearn/FeatureImportance.py +6 -2
  119. validmind/tests/model_validation/sklearn/FowlkesMallowsScore.py +12 -9
  120. validmind/tests/model_validation/sklearn/HomogeneityScore.py +14 -9
  121. validmind/tests/model_validation/sklearn/HyperParametersTuning.py +4 -2
  122. validmind/tests/model_validation/sklearn/KMeansClustersOptimization.py +6 -1
  123. validmind/tests/model_validation/sklearn/MinimumAccuracy.py +12 -7
  124. validmind/tests/model_validation/sklearn/MinimumF1Score.py +12 -7
  125. validmind/tests/model_validation/sklearn/MinimumROCAUCScore.py +21 -6
  126. validmind/tests/model_validation/sklearn/OverfitDiagnosis.py +11 -3
  127. validmind/tests/model_validation/sklearn/PermutationFeatureImportance.py +5 -1
  128. validmind/tests/model_validation/sklearn/PopulationStabilityIndex.py +5 -1
  129. validmind/tests/model_validation/sklearn/PrecisionRecallCurve.py +6 -1
  130. validmind/tests/model_validation/sklearn/ROCCurve.py +3 -1
  131. validmind/tests/model_validation/sklearn/RegressionErrors.py +6 -2
  132. validmind/tests/model_validation/sklearn/RegressionPerformance.py +13 -8
  133. validmind/tests/model_validation/sklearn/RegressionR2Square.py +8 -5
  134. validmind/tests/model_validation/sklearn/RobustnessDiagnosis.py +5 -1
  135. validmind/tests/model_validation/sklearn/SHAPGlobalImportance.py +34 -26
  136. validmind/tests/model_validation/sklearn/ScoreProbabilityAlignment.py +10 -2
  137. validmind/tests/model_validation/sklearn/SilhouettePlot.py +5 -1
  138. validmind/tests/model_validation/sklearn/VMeasure.py +12 -9
  139. validmind/tests/model_validation/sklearn/WeakspotsDiagnosis.py +15 -10
  140. validmind/tests/model_validation/statsmodels/CumulativePredictionProbabilities.py +5 -1
  141. validmind/tests/model_validation/statsmodels/DurbinWatsonTest.py +6 -1
  142. validmind/tests/model_validation/statsmodels/GINITable.py +8 -1
  143. validmind/tests/model_validation/statsmodels/KolmogorovSmirnov.py +2 -2
  144. validmind/tests/model_validation/statsmodels/PredictionProbabilitiesHistogram.py +6 -2
  145. validmind/tests/model_validation/statsmodels/RegressionCoeffs.py +8 -2
  146. validmind/tests/model_validation/statsmodels/RegressionFeatureSignificance.py +3 -1
  147. validmind/tests/model_validation/statsmodels/RegressionModelForecastPlot.py +7 -2
  148. validmind/tests/model_validation/statsmodels/RegressionModelForecastPlotLevels.py +2 -0
  149. validmind/tests/model_validation/statsmodels/RegressionModelSensitivityPlot.py +2 -0
  150. validmind/tests/model_validation/statsmodels/RegressionModelSummary.py +4 -2
  151. validmind/tests/model_validation/statsmodels/RegressionPermutationFeatureImportance.py +3 -1
  152. validmind/tests/ongoing_monitoring/CalibrationCurveDrift.py +11 -1
  153. validmind/tests/ongoing_monitoring/ClassificationAccuracyDrift.py +10 -2
  154. validmind/tests/ongoing_monitoring/ConfusionMatrixDrift.py +8 -1
  155. validmind/tests/ongoing_monitoring/CumulativePredictionProbabilitiesDrift.py +18 -2
  156. validmind/tests/ongoing_monitoring/FeatureDrift.py +9 -2
  157. validmind/tests/ongoing_monitoring/PredictionAcrossEachFeature.py +8 -2
  158. validmind/tests/ongoing_monitoring/PredictionCorrelation.py +13 -2
  159. validmind/tests/ongoing_monitoring/PredictionProbabilitiesHistogramDrift.py +13 -2
  160. validmind/tests/ongoing_monitoring/ROCCurveDrift.py +16 -2
  161. validmind/tests/ongoing_monitoring/ScoreBandsDrift.py +11 -2
  162. validmind/tests/ongoing_monitoring/TargetPredictionDistributionPlot.py +13 -2
  163. validmind/tests/output.py +66 -11
  164. validmind/tests/prompt_validation/Clarity.py +1 -1
  165. validmind/tests/prompt_validation/NegativeInstruction.py +1 -1
  166. validmind/tests/prompt_validation/Robustness.py +6 -1
  167. validmind/tests/prompt_validation/Specificity.py +1 -1
  168. validmind/tests/run.py +28 -14
  169. validmind/tests/test_providers.py +28 -35
  170. validmind/tests/utils.py +17 -4
  171. validmind/unit_metrics/__init__.py +1 -1
  172. validmind/utils.py +295 -31
  173. validmind/vm_models/dataset/dataset.py +19 -16
  174. validmind/vm_models/dataset/utils.py +5 -3
  175. validmind/vm_models/figure.py +6 -6
  176. validmind/vm_models/input.py +6 -5
  177. validmind/vm_models/model.py +5 -5
  178. validmind/vm_models/result/result.py +122 -43
  179. validmind/vm_models/result/utils.py +9 -28
  180. validmind/vm_models/test_suite/__init__.py +5 -0
  181. validmind/vm_models/test_suite/runner.py +5 -5
  182. validmind/vm_models/test_suite/summary.py +20 -2
  183. validmind/vm_models/test_suite/test.py +6 -6
  184. validmind/vm_models/test_suite/test_suite.py +10 -10
  185. {validmind-2.8.10.dist-info → validmind-2.8.20.dist-info}/METADATA +4 -5
  186. {validmind-2.8.10.dist-info → validmind-2.8.20.dist-info}/RECORD +189 -188
  187. {validmind-2.8.10.dist-info → validmind-2.8.20.dist-info}/WHEEL +1 -1
  188. {validmind-2.8.10.dist-info → validmind-2.8.20.dist-info}/LICENSE +0 -0
  189. {validmind-2.8.10.dist-info → validmind-2.8.20.dist-info}/entry_points.txt +0 -0
@@ -9,7 +9,7 @@ import pandas as pd
9
9
  import plotly.graph_objects as go
10
10
  from sklearn.calibration import calibration_curve
11
11
 
12
- from validmind import tags, tasks
12
+ from validmind import RawData, tags, tasks
13
13
  from validmind.errors import SkipTestError
14
14
  from validmind.vm_models import VMDataset, VMModel
15
15
 
@@ -217,4 +217,14 @@ def CalibrationCurveDrift(
217
217
  fig,
218
218
  {"Mean Predicted Probabilities": pred_df, "Fraction of Positives": true_df},
219
219
  pass_fail_bool,
220
+ RawData(
221
+ prob_true_ref=prob_true_ref,
222
+ prob_pred_ref=prob_pred_ref,
223
+ prob_true_mon=prob_true_mon,
224
+ prob_pred_mon=prob_pred_mon,
225
+ bin_labels=bin_labels,
226
+ model=model.input_id,
227
+ dataset_ref=datasets[0].input_id,
228
+ dataset_mon=datasets[1].input_id,
229
+ ),
220
230
  )
@@ -8,7 +8,7 @@ import numpy as np
8
8
  import pandas as pd
9
9
  from sklearn.metrics import classification_report
10
10
 
11
- from validmind import tags, tasks
11
+ from validmind import RawData, tags, tasks
12
12
  from validmind.vm_models import VMDataset, VMModel
13
13
 
14
14
 
@@ -145,4 +145,12 @@ def ClassificationAccuracyDrift(
145
145
  # Calculate overall pass/fail
146
146
  pass_fail_bool = (df["Pass/Fail"] == "Pass").all()
147
147
 
148
- return ({"Classification Accuracy Metrics": df}, pass_fail_bool)
148
+ raw_data = RawData(
149
+ report_reference=report_ref,
150
+ report_monitoring=report_mon,
151
+ model=model.input_id,
152
+ dataset_reference=datasets[0].input_id,
153
+ dataset_monitoring=datasets[1].input_id,
154
+ )
155
+
156
+ return ({"Classification Accuracy Metrics": df}, pass_fail_bool, raw_data)
@@ -8,7 +8,7 @@ import numpy as np
8
8
  import pandas as pd
9
9
  from sklearn.metrics import confusion_matrix
10
10
 
11
- from validmind import tags, tasks
11
+ from validmind import RawData, tags, tasks
12
12
  from validmind.vm_models import VMDataset, VMModel
13
13
 
14
14
 
@@ -190,4 +190,11 @@ def ConfusionMatrixDrift(
190
190
  return (
191
191
  {"Confusion Matrix Metrics": metrics_df, "Sample Counts": counts_df},
192
192
  pass_fail_bool,
193
+ RawData(
194
+ confusion_matrix_reference=cm_ref,
195
+ confusion_matrix_monitoring=cm_mon,
196
+ model=model.input_id,
197
+ dataset_reference=datasets[0].input_id,
198
+ dataset_monitoring=datasets[1].input_id,
199
+ ),
193
200
  )
@@ -8,7 +8,7 @@ import numpy as np
8
8
  import plotly.graph_objects as go
9
9
  from plotly.subplots import make_subplots
10
10
 
11
- from validmind import tags, tasks
11
+ from validmind import RawData, tags, tasks
12
12
  from validmind.vm_models import VMDataset, VMModel
13
13
 
14
14
 
@@ -83,6 +83,7 @@ def CumulativePredictionProbabilitiesDrift(
83
83
  diff_color = "rgba(148, 103, 189, 0.8)" # Purple with 0.8 opacity
84
84
 
85
85
  figures = []
86
+ raw_data = {}
86
87
  for class_value in classes:
87
88
  # Create figure with secondary y-axis
88
89
  fig = make_subplots(
@@ -175,4 +176,19 @@ def CumulativePredictionProbabilitiesDrift(
175
176
 
176
177
  figures.append(fig)
177
178
 
178
- return tuple(figures)
179
+ # Store raw data for current class
180
+ raw_data[f"class_{class_value}_ref_probs"] = ref_probs
181
+ raw_data[f"class_{class_value}_mon_probs"] = mon_probs
182
+ raw_data[f"class_{class_value}_ref_sorted"] = ref_sorted
183
+ raw_data[f"class_{class_value}_ref_cumsum"] = ref_cumsum
184
+ raw_data[f"class_{class_value}_mon_sorted"] = mon_sorted
185
+ raw_data[f"class_{class_value}_mon_cumsum"] = mon_cumsum
186
+
187
+ return tuple(figures) + (
188
+ RawData(
189
+ model=model.input_id,
190
+ dataset_reference=datasets[0].input_id,
191
+ dataset_monitoring=datasets[1].input_id,
192
+ **raw_data,
193
+ ),
194
+ )
@@ -6,7 +6,7 @@ import numpy as np
6
6
  import pandas as pd
7
7
  import plotly.graph_objects as go
8
8
 
9
- from validmind import tags, tasks
9
+ from validmind import RawData, tags, tasks
10
10
 
11
11
 
12
12
  def calculate_psi_score(actual, expected):
@@ -183,4 +183,11 @@ def FeatureDrift(
183
183
  # Calculate overall pass/fail
184
184
  pass_fail_bool = (psi_df["Pass/Fail"] == "Pass").all()
185
185
 
186
- return ({"PSI Scores": psi_df}, *figures, pass_fail_bool)
186
+ # Prepare raw data
187
+ raw_data = RawData(
188
+ distributions=distributions,
189
+ dataset_reference=datasets[0].input_id,
190
+ dataset_monitoring=datasets[1].input_id,
191
+ )
192
+
193
+ return ({"PSI Scores": psi_df}, *figures, pass_fail_bool, raw_data)
@@ -5,7 +5,7 @@
5
5
 
6
6
  import matplotlib.pyplot as plt
7
7
 
8
- from validmind import tags, tasks
8
+ from validmind import RawData, tags, tasks
9
9
 
10
10
 
11
11
  @tags("visualization")
@@ -74,4 +74,10 @@ def PredictionAcrossEachFeature(datasets, model):
74
74
  figures_to_save.append(fig)
75
75
  plt.close()
76
76
 
77
- return tuple(figures_to_save)
77
+ return tuple(figures_to_save), RawData(
78
+ y_prob_reference=y_prob_reference,
79
+ y_prob_monitoring=y_prob_monitoring,
80
+ model=model.input_id,
81
+ dataset_reference=datasets[0].input_id,
82
+ dataset_monitoring=datasets[1].input_id,
83
+ )
@@ -5,7 +5,7 @@
5
5
  import pandas as pd
6
6
  import plotly.graph_objects as go
7
7
 
8
- from validmind import tags, tasks
8
+ from validmind import RawData, tags, tasks
9
9
 
10
10
 
11
11
  @tags("visualization")
@@ -140,4 +140,15 @@ def PredictionCorrelation(datasets, model, drift_pct_threshold=20):
140
140
  # Calculate overall pass/fail
141
141
  pass_fail_bool = (corr_final["Pass/Fail"] == "Pass").all()
142
142
 
143
- return ({"Correlation Pair Table": corr_final}, fig, pass_fail_bool)
143
+ return (
144
+ {"Correlation Pair Table": corr_final},
145
+ fig,
146
+ pass_fail_bool,
147
+ RawData(
148
+ reference_correlations=corr_ref.to_dict(),
149
+ monitoring_correlations=corr_mon.to_dict(),
150
+ model=model.input_id,
151
+ dataset_reference=datasets[0].input_id,
152
+ dataset_monitoring=datasets[1].input_id,
153
+ ),
154
+ )
@@ -10,7 +10,7 @@ import plotly.graph_objects as go
10
10
  from plotly.subplots import make_subplots
11
11
  from scipy import stats
12
12
 
13
- from validmind import tags, tasks
13
+ from validmind import RawData, tags, tasks
14
14
  from validmind.vm_models import VMDataset, VMModel
15
15
 
16
16
 
@@ -201,4 +201,15 @@ def PredictionProbabilitiesHistogramDrift(
201
201
  }
202
202
  )
203
203
 
204
- return fig, tables, all_passed
204
+ return (
205
+ fig,
206
+ tables,
207
+ all_passed,
208
+ RawData(
209
+ reference_probabilities=y_prob_ref,
210
+ monitoring_probabilities=y_prob_mon,
211
+ model=model.input_id,
212
+ dataset_reference=datasets[0].input_id,
213
+ dataset_monitoring=datasets[1].input_id,
214
+ ),
215
+ )
@@ -8,7 +8,7 @@ import numpy as np
8
8
  import plotly.graph_objects as go
9
9
  from sklearn.metrics import roc_auc_score, roc_curve
10
10
 
11
- from validmind import tags, tasks
11
+ from validmind import RawData, tags, tasks
12
12
  from validmind.errors import SkipTestError
13
13
  from validmind.vm_models import VMDataset, VMModel
14
14
 
@@ -147,4 +147,18 @@ def ROCCurveDrift(datasets: List[VMDataset], model: VMModel):
147
147
  height=500,
148
148
  )
149
149
 
150
- return fig1, fig2
150
+ return (
151
+ fig1,
152
+ fig2,
153
+ RawData(
154
+ fpr_ref=fpr_ref,
155
+ tpr_ref=tpr_ref,
156
+ auc_ref=auc_ref,
157
+ fpr_mon=fpr_mon,
158
+ tpr_mon=tpr_mon,
159
+ auc_mon=auc_mon,
160
+ model=model.input_id,
161
+ dataset_reference=datasets[0].input_id,
162
+ dataset_monitoring=datasets[1].input_id,
163
+ ),
164
+ )
@@ -7,7 +7,7 @@ from typing import List
7
7
  import numpy as np
8
8
  import pandas as pd
9
9
 
10
- from validmind import tags, tasks
10
+ from validmind import RawData, tags, tasks
11
11
  from validmind.vm_models import VMDataset, VMModel
12
12
 
13
13
 
@@ -209,4 +209,13 @@ def ScoreBandsDrift(
209
209
  tables[table_name] = pd.DataFrame(rows)
210
210
  all_passed &= metric_passed
211
211
 
212
- return tables, all_passed
212
+ # Collect raw data
213
+ raw_data = RawData(
214
+ ref_results=ref_results,
215
+ mon_results=mon_results,
216
+ model=model.input_id,
217
+ dataset_reference=datasets[0].input_id,
218
+ dataset_monitoring=datasets[1].input_id,
219
+ )
220
+
221
+ return tables, all_passed, raw_data
@@ -7,7 +7,7 @@ import plotly.figure_factory as ff
7
7
  import plotly.graph_objects as go
8
8
  from scipy.stats import kurtosis, skew
9
9
 
10
- from validmind import tags, tasks
10
+ from validmind import RawData, tags, tasks
11
11
 
12
12
 
13
13
  @tags("visualization")
@@ -142,4 +142,15 @@ def TargetPredictionDistributionPlot(datasets, model, drift_pct_threshold=20):
142
142
 
143
143
  pass_fail_bool = (moments["Pass/Fail"] == "Pass").all()
144
144
 
145
- return ({"Distribution Moments": moments}, fig, pass_fail_bool)
145
+ return (
146
+ {"Distribution Moments": moments},
147
+ fig,
148
+ pass_fail_bool,
149
+ RawData(
150
+ pred_ref=pred_ref,
151
+ pred_monitor=pred_monitor,
152
+ model=model.input_id,
153
+ dataset_reference=datasets[0].input_id,
154
+ dataset_monitoring=datasets[1].input_id,
155
+ ),
156
+ )
validmind/tests/output.py CHANGED
@@ -9,6 +9,7 @@ from uuid import uuid4
9
9
  import numpy as np
10
10
  import pandas as pd
11
11
 
12
+ from validmind.utils import is_html, md_to_html
12
13
  from validmind.vm_models.figure import (
13
14
  Figure,
14
15
  is_matplotlib_figure,
@@ -77,30 +78,72 @@ class FigureOutputHandler(OutputHandler):
77
78
 
78
79
  class TableOutputHandler(OutputHandler):
79
80
  def can_handle(self, item: Any) -> bool:
80
- return isinstance(item, (list, pd.DataFrame, dict, ResultTable))
81
+ return isinstance(item, (list, pd.DataFrame, dict, ResultTable, tuple))
82
+
83
+ def _convert_simple_type(self, data: Any) -> pd.DataFrame:
84
+ """Convert a simple data type to a DataFrame."""
85
+ if isinstance(data, dict):
86
+ return pd.DataFrame([data])
87
+ elif data is None:
88
+ return pd.DataFrame()
89
+ else:
90
+ raise ValueError(f"Cannot convert {type(data)} to DataFrame")
91
+
92
+ def _convert_list(self, data_list: List) -> pd.DataFrame:
93
+ """Convert a list to a DataFrame."""
94
+ if not data_list:
95
+ return pd.DataFrame()
96
+
97
+ try:
98
+ return pd.DataFrame(data_list)
99
+ except Exception as e:
100
+ # If conversion fails, try to handle common cases
101
+ if all(
102
+ isinstance(item, (int, float, str, bool, type(None)))
103
+ for item in data_list
104
+ ):
105
+ return pd.DataFrame({"Values": data_list})
106
+ else:
107
+ raise ValueError(f"Could not convert list to DataFrame: {e}")
108
+
109
+ def _convert_to_dataframe(self, table_data: Any) -> pd.DataFrame:
110
+ """Convert various data types to a pandas DataFrame."""
111
+ # Handle special cases by type
112
+ if isinstance(table_data, pd.DataFrame):
113
+ return table_data
114
+ elif isinstance(table_data, (dict, str, type(None))):
115
+ return self._convert_simple_type(table_data)
116
+ elif isinstance(table_data, tuple):
117
+ return self._convert_list(list(table_data))
118
+ elif isinstance(table_data, list):
119
+ return self._convert_list(table_data)
120
+ else:
121
+ # If we reach here, we don't know how to handle this type
122
+ raise ValueError(
123
+ f"Invalid table format: must be a list of dictionaries or a DataFrame, got {type(table_data)}"
124
+ )
81
125
 
82
126
  def process(
83
127
  self,
84
- item: Union[List[Dict[str, Any]], pd.DataFrame, Dict[str, Any], ResultTable],
128
+ item: Union[
129
+ List[Dict[str, Any]], pd.DataFrame, Dict[str, Any], ResultTable, str, tuple
130
+ ],
85
131
  result: TestResult,
86
132
  ) -> None:
133
+ # Convert to a dictionary of tables if not already
87
134
  tables = item if isinstance(item, dict) else {"": item}
88
135
 
89
136
  for table_name, table_data in tables.items():
90
- # if already a ResultTable, add it directly
137
+ # If already a ResultTable, add it directly
91
138
  if isinstance(table_data, ResultTable):
92
139
  result.add_table(table_data)
93
140
  continue
94
141
 
95
- if not isinstance(table_data, (list, pd.DataFrame)):
96
- raise ValueError(
97
- "Invalid table format: must be a list of dictionaries or a DataFrame"
98
- )
99
-
100
- if isinstance(table_data, list):
101
- table_data = pd.DataFrame(table_data)
142
+ # Convert the data to a DataFrame using our helper method
143
+ df = self._convert_to_dataframe(table_data)
102
144
 
103
- result.add_table(ResultTable(data=table_data, title=table_name or None))
145
+ # Add the resulting DataFrame as a table to the resul
146
+ result.add_table(ResultTable(data=df, title=table_name or None))
104
147
 
105
148
 
106
149
  class RawDataOutputHandler(OutputHandler):
@@ -111,6 +154,17 @@ class RawDataOutputHandler(OutputHandler):
111
154
  result.raw_data = item
112
155
 
113
156
 
157
+ class StringOutputHandler(OutputHandler):
158
+ def can_handle(self, item: Any) -> bool:
159
+ return isinstance(item, str)
160
+
161
+ def process(self, item: Any, result: TestResult) -> None:
162
+ if not is_html(item):
163
+ item = md_to_html(item, mathml=True)
164
+
165
+ result.description = item
166
+
167
+
114
168
  def process_output(item: Any, result: TestResult) -> None:
115
169
  """Process a single test output item and update the TestResult."""
116
170
  handlers = [
@@ -119,6 +173,7 @@ def process_output(item: Any, result: TestResult) -> None:
119
173
  FigureOutputHandler(),
120
174
  TableOutputHandler(),
121
175
  RawDataOutputHandler(),
176
+ StringOutputHandler(),
122
177
  ]
123
178
 
124
179
  for handler in handlers:
@@ -106,5 +106,5 @@ def Clarity(model, min_threshold=7):
106
106
  }
107
107
  ],
108
108
  passed,
109
- RawData(response=response),
109
+ RawData(response=response, model=model.input_id),
110
110
  )
@@ -117,5 +117,5 @@ def NegativeInstruction(model, min_threshold=7):
117
117
  }
118
118
  ],
119
119
  passed,
120
- RawData(model_response=response),
120
+ RawData(model_response=response, model=model.input_id),
121
121
  )
@@ -130,5 +130,10 @@ def Robustness(model, dataset, num_tests=10):
130
130
  return (
131
131
  results,
132
132
  all(result["Pass/Fail"] == "Pass" for result in results),
133
- RawData(generated_inputs=generated_inputs, responses=responses),
133
+ RawData(
134
+ generated_inputs=generated_inputs,
135
+ responses=responses,
136
+ model=model.input_id,
137
+ dataset=dataset.input_id,
138
+ ),
134
139
  )
@@ -113,5 +113,5 @@ def Specificity(model, min_threshold=7):
113
113
  }
114
114
  ],
115
115
  passed,
116
- RawData(response=response),
116
+ RawData(response=response, model=model.input_id),
117
117
  )
validmind/tests/run.py CHANGED
@@ -76,7 +76,7 @@ def _get_run_metadata(**metadata: Dict[str, Any]) -> Dict[str, Any]:
76
76
 
77
77
  def _get_test_kwargs(
78
78
  test_func: callable, inputs: Dict[str, Any], params: Dict[str, Any]
79
- ):
79
+ ) -> Tuple[Dict[str, Any], Dict[str, Any]]:
80
80
  """Insepect function signature to build kwargs to pass the inputs and params
81
81
  that the test function expects
82
82
 
@@ -93,7 +93,7 @@ def _get_test_kwargs(
93
93
  params (dict): Test parameters e.g. {"param1": 1, "param2": 2}
94
94
 
95
95
  Returns:
96
- tuple: Tuple of input and param kwargs
96
+ Tuple[Dict[str, Any], Dict[str, Any]]: Tuple of input and param kwargs
97
97
  """
98
98
  input_kwargs = {} # map function inputs (`dataset` etc) to actual objects
99
99
 
@@ -222,6 +222,7 @@ def _run_comparison_test(
222
222
  params: Union[Dict[str, Any], None],
223
223
  param_grid: Union[Dict[str, List[Any]], List[Dict[str, Any]], None],
224
224
  title: Optional[str] = None,
225
+ show_params: bool = True,
225
226
  ):
226
227
  """Run a comparison test i.e. a test that compares multiple outputs of a test across
227
228
  different input and/or param combinations"""
@@ -242,6 +243,7 @@ def _run_comparison_test(
242
243
  show=False,
243
244
  generate_description=False,
244
245
  title=title,
246
+ show_params=show_params,
245
247
  )
246
248
  for config in run_test_configs
247
249
  ]
@@ -253,7 +255,9 @@ def _run_comparison_test(
253
255
  else:
254
256
  test_doc = describe_test(test_id, raw=True)["Description"]
255
257
 
256
- combined_outputs, combined_inputs, combined_params = combine_results(results)
258
+ combined_outputs, combined_inputs, combined_params = combine_results(
259
+ results, show_params
260
+ )
257
261
 
258
262
  return build_test_result(
259
263
  outputs=combined_outputs,
@@ -265,7 +269,12 @@ def _run_comparison_test(
265
269
  )
266
270
 
267
271
 
268
- def _run_test(test_id: TestID, inputs: Dict[str, Any], params: Dict[str, Any]):
272
+ def _run_test(
273
+ test_id: TestID,
274
+ inputs: Dict[str, Any],
275
+ params: Dict[str, Any],
276
+ title: Optional[str] = None,
277
+ ):
269
278
  """Run a standard test and return a TestResult object"""
270
279
  test_func = load_test(test_id)
271
280
  input_kwargs, param_kwargs = _get_test_kwargs(
@@ -282,6 +291,7 @@ def _run_test(test_id: TestID, inputs: Dict[str, Any], params: Dict[str, Any]):
282
291
  test_doc=getdoc(test_func),
283
292
  inputs=input_kwargs,
284
293
  params=param_kwargs,
294
+ title=title,
285
295
  )
286
296
 
287
297
 
@@ -297,6 +307,7 @@ def run_test( # noqa: C901
297
307
  generate_description: bool = True,
298
308
  title: Optional[str] = None,
299
309
  post_process_fn: Union[Callable[[TestResult], None], None] = None,
310
+ show_params: bool = True,
300
311
  **kwargs,
301
312
  ) -> TestResult:
302
313
  """Run a ValidMind or custom test
@@ -321,6 +332,7 @@ def run_test( # noqa: C901
321
332
  generate_description (bool, optional): Whether to generate a description. Defaults to True.
322
333
  title (str, optional): Custom title for the test result
323
334
  post_process_fn (Callable[[TestResult], None], optional): Function to post-process the test result
335
+ show_params (bool, optional): Whether to include parameter values in figure titles for comparison tests. Defaults to True.
324
336
 
325
337
  Returns:
326
338
  TestResult: A TestResult object containing the test results
@@ -358,6 +370,7 @@ def run_test( # noqa: C901
358
370
  input_grid=input_grid,
359
371
  params=params,
360
372
  param_grid=param_grid,
373
+ show_params=show_params,
361
374
  )
362
375
 
363
376
  elif unit_metrics:
@@ -375,7 +388,7 @@ def run_test( # noqa: C901
375
388
  )
376
389
 
377
390
  else:
378
- result = _run_test(test_id, inputs, params)
391
+ result = _run_test(test_id, inputs, params, title)
379
392
 
380
393
  end_time = time.perf_counter()
381
394
  result.metadata = _get_run_metadata(duration_seconds=end_time - start_time)
@@ -383,15 +396,16 @@ def run_test( # noqa: C901
383
396
  if post_process_fn:
384
397
  result = post_process_fn(result)
385
398
 
386
- result.description = get_result_description(
387
- test_id=test_id,
388
- test_description=result.doc,
389
- tables=result.tables,
390
- figures=result.figures,
391
- metric=result.metric,
392
- should_generate=generate_description,
393
- title=title,
394
- )
399
+ if not result.description:
400
+ result.description = get_result_description(
401
+ test_id=test_id,
402
+ test_description=result.doc,
403
+ tables=result.tables,
404
+ figures=result.figures,
405
+ metric=result.metric,
406
+ should_generate=generate_description,
407
+ title=title,
408
+ )
395
409
 
396
410
  if show:
397
411
  result.show()
@@ -7,7 +7,7 @@ import os
7
7
  import re
8
8
  import sys
9
9
  from pathlib import Path
10
- from typing import List, Protocol
10
+ from typing import Any, Callable, List, Protocol
11
11
 
12
12
  from validmind.logging import get_logger
13
13
 
@@ -95,45 +95,38 @@ class LocalTestProvider:
95
95
  """
96
96
  self.root_folder = os.path.abspath(root_folder)
97
97
 
98
- def list_tests(self):
98
+ def list_tests(self) -> List[str]:
99
99
  """List all tests in the given namespace
100
100
 
101
101
  Returns:
102
102
  list: A list of test IDs
103
103
  """
104
- test_ids = []
105
-
104
+ test_files = []
106
105
  for root, _, files in os.walk(self.root_folder):
107
- for filename in files:
108
- if not filename.endswith(".py") or filename.startswith("__"):
109
- continue
110
-
111
- path = Path(root) / filename
112
- if not _is_test_file(path):
106
+ for file in files:
107
+ if not file.endswith(".py"):
113
108
  continue
114
109
 
115
- rel_path = path.relative_to(self.root_folder)
116
-
117
- test_id_parts = [p.stem for p in rel_path.parents if p.stem][::-1]
118
- test_id_parts.append(path.stem)
119
- test_ids.append(".".join(test_id_parts))
110
+ path = Path(os.path.join(root, file))
111
+ if _is_test_file(path):
112
+ rel_path = os.path.relpath(path, self.root_folder)
113
+ test_id = os.path.splitext(rel_path)[0].replace(os.sep, ".")
114
+ test_files.append(test_id)
120
115
 
121
- return sorted(test_ids)
116
+ return test_files
122
117
 
123
- def load_test(self, test_id: str):
124
- """
125
- Load the test identified by the given test_id.
118
+ def load_test(self, test_id: str) -> Callable[..., Any]:
119
+ """Load the test function identified by the given test_id
126
120
 
127
121
  Args:
128
- test_id (str): The identifier of the test. This corresponds to the relative
129
- path of the python file from the root folder, with slashes replaced by dots
122
+ test_id (str): The test ID (does not contain the namespace under which
123
+ the test is registered)
130
124
 
131
125
  Returns:
132
- The test class that matches the last part of the test_id.
126
+ callable: The test function
133
127
 
134
128
  Raises:
135
- LocalTestProviderLoadModuleError: If the test module cannot be imported
136
- LocalTestProviderLoadTestError: If the test class cannot be found in the module
129
+ FileNotFoundError: If the test is not found
137
130
  """
138
131
  # Convert test_id to file path
139
132
  file_path = os.path.join(self.root_folder, f"{test_id.replace('.', '/')}.py")
@@ -162,28 +155,28 @@ class LocalTestProvider:
162
155
 
163
156
 
164
157
  class ValidMindTestProvider:
165
- """Test provider for ValidMind tests"""
158
+ """Provider for built-in ValidMind tests"""
166
159
 
167
- def __init__(self):
160
+ def __init__(self) -> None:
168
161
  # two subproviders: unit_metrics and normal tests
169
- self.metrics_provider = LocalTestProvider(
162
+ self.unit_metrics_provider = LocalTestProvider(
170
163
  os.path.join(os.path.dirname(__file__), "..", "unit_metrics")
171
164
  )
172
- self.tests_provider = LocalTestProvider(os.path.dirname(__file__))
165
+ self.test_provider = LocalTestProvider(os.path.dirname(__file__))
173
166
 
174
167
  def list_tests(self) -> List[str]:
175
- """List all tests in the ValidMind test provider"""
168
+ """List all tests in the given namespace"""
176
169
  metric_ids = [
177
- f"unit_metrics.{test}" for test in self.metrics_provider.list_tests()
170
+ f"unit_metrics.{test}" for test in self.unit_metrics_provider.list_tests()
178
171
  ]
179
- test_ids = self.tests_provider.list_tests()
172
+ test_ids = self.test_provider.list_tests()
180
173
 
181
174
  return metric_ids + test_ids
182
175
 
183
- def load_test(self, test_id: str) -> callable:
184
- """Load a ValidMind test or unit metric"""
176
+ def load_test(self, test_id: str) -> Callable[..., Any]:
177
+ """Load the test function identified by the given test_id"""
185
178
  return (
186
- self.metrics_provider.load_test(test_id.replace("unit_metrics.", ""))
179
+ self.unit_metrics_provider.load_test(test_id.replace("unit_metrics.", ""))
187
180
  if test_id.startswith("unit_metrics.")
188
- else self.tests_provider.load_test(test_id)
181
+ else self.test_provider.load_test(test_id)
189
182
  )