validmind 2.7.6__py3-none-any.whl → 2.7.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (148) hide show
  1. validmind/__init__.py +2 -0
  2. validmind/__version__.py +1 -1
  3. validmind/ai/test_descriptions.py +32 -2
  4. validmind/api_client.py +8 -1
  5. validmind/datasets/credit_risk/lending_club.py +3 -4
  6. validmind/html_templates/content_blocks.py +1 -1
  7. validmind/tests/__types__.py +17 -0
  8. validmind/tests/data_validation/ACFandPACFPlot.py +6 -2
  9. validmind/tests/data_validation/AutoMA.py +2 -2
  10. validmind/tests/data_validation/BivariateScatterPlots.py +4 -2
  11. validmind/tests/data_validation/BoxPierce.py +2 -2
  12. validmind/tests/data_validation/ClassImbalance.py +2 -1
  13. validmind/tests/data_validation/DatasetDescription.py +11 -2
  14. validmind/tests/data_validation/DatasetSplit.py +2 -2
  15. validmind/tests/data_validation/DickeyFullerGLS.py +2 -2
  16. validmind/tests/data_validation/FeatureTargetCorrelationPlot.py +8 -2
  17. validmind/tests/data_validation/HighCardinality.py +9 -2
  18. validmind/tests/data_validation/HighPearsonCorrelation.py +6 -2
  19. validmind/tests/data_validation/IQROutliersBarPlot.py +9 -2
  20. validmind/tests/data_validation/LaggedCorrelationHeatmap.py +2 -2
  21. validmind/tests/data_validation/MissingValuesBarPlot.py +12 -9
  22. validmind/tests/data_validation/MutualInformation.py +6 -8
  23. validmind/tests/data_validation/PearsonCorrelationMatrix.py +2 -2
  24. validmind/tests/data_validation/ProtectedClassesCombination.py +6 -1
  25. validmind/tests/data_validation/ProtectedClassesDescription.py +1 -1
  26. validmind/tests/data_validation/ProtectedClassesDisparity.py +4 -5
  27. validmind/tests/data_validation/ProtectedClassesThresholdOptimizer.py +1 -4
  28. validmind/tests/data_validation/RollingStatsPlot.py +21 -10
  29. validmind/tests/data_validation/ScatterPlot.py +3 -5
  30. validmind/tests/data_validation/ScoreBandDefaultRates.py +2 -1
  31. validmind/tests/data_validation/SeasonalDecompose.py +12 -2
  32. validmind/tests/data_validation/Skewness.py +6 -3
  33. validmind/tests/data_validation/SpreadPlot.py +8 -3
  34. validmind/tests/data_validation/TabularCategoricalBarPlots.py +4 -2
  35. validmind/tests/data_validation/TabularDateTimeHistograms.py +2 -2
  36. validmind/tests/data_validation/TargetRateBarPlots.py +4 -3
  37. validmind/tests/data_validation/TimeSeriesFrequency.py +7 -2
  38. validmind/tests/data_validation/TimeSeriesMissingValues.py +14 -10
  39. validmind/tests/data_validation/TimeSeriesOutliers.py +1 -5
  40. validmind/tests/data_validation/WOEBinPlots.py +2 -2
  41. validmind/tests/data_validation/WOEBinTable.py +11 -9
  42. validmind/tests/data_validation/nlp/CommonWords.py +2 -2
  43. validmind/tests/data_validation/nlp/Hashtags.py +2 -2
  44. validmind/tests/data_validation/nlp/LanguageDetection.py +9 -6
  45. validmind/tests/data_validation/nlp/Mentions.py +9 -6
  46. validmind/tests/data_validation/nlp/PolarityAndSubjectivity.py +2 -2
  47. validmind/tests/data_validation/nlp/Punctuations.py +4 -2
  48. validmind/tests/data_validation/nlp/Sentiment.py +2 -2
  49. validmind/tests/data_validation/nlp/StopWords.py +5 -4
  50. validmind/tests/data_validation/nlp/TextDescription.py +2 -2
  51. validmind/tests/data_validation/nlp/Toxicity.py +2 -2
  52. validmind/tests/model_validation/BertScore.py +2 -2
  53. validmind/tests/model_validation/BleuScore.py +2 -2
  54. validmind/tests/model_validation/ClusterSizeDistribution.py +2 -2
  55. validmind/tests/model_validation/ContextualRecall.py +2 -2
  56. validmind/tests/model_validation/FeaturesAUC.py +2 -2
  57. validmind/tests/model_validation/MeteorScore.py +2 -2
  58. validmind/tests/model_validation/ModelPredictionResiduals.py +2 -2
  59. validmind/tests/model_validation/RegardScore.py +6 -2
  60. validmind/tests/model_validation/RegressionResidualsPlot.py +4 -3
  61. validmind/tests/model_validation/RougeScore.py +6 -5
  62. validmind/tests/model_validation/TimeSeriesPredictionWithCI.py +11 -2
  63. validmind/tests/model_validation/TokenDisparity.py +2 -2
  64. validmind/tests/model_validation/ToxicityScore.py +10 -2
  65. validmind/tests/model_validation/embeddings/ClusterDistribution.py +9 -3
  66. validmind/tests/model_validation/embeddings/CosineSimilarityComparison.py +16 -2
  67. validmind/tests/model_validation/embeddings/CosineSimilarityDistribution.py +5 -3
  68. validmind/tests/model_validation/embeddings/CosineSimilarityHeatmap.py +2 -2
  69. validmind/tests/model_validation/embeddings/DescriptiveAnalytics.py +14 -4
  70. validmind/tests/model_validation/embeddings/EmbeddingsVisualization2D.py +2 -2
  71. validmind/tests/model_validation/embeddings/EuclideanDistanceComparison.py +16 -2
  72. validmind/tests/model_validation/embeddings/EuclideanDistanceHeatmap.py +2 -2
  73. validmind/tests/model_validation/embeddings/PCAComponentsPairwisePlots.py +4 -5
  74. validmind/tests/model_validation/embeddings/StabilityAnalysisKeyword.py +4 -2
  75. validmind/tests/model_validation/embeddings/StabilityAnalysisRandomNoise.py +4 -2
  76. validmind/tests/model_validation/embeddings/StabilityAnalysisSynonyms.py +4 -2
  77. validmind/tests/model_validation/embeddings/StabilityAnalysisTranslation.py +4 -2
  78. validmind/tests/model_validation/embeddings/TSNEComponentsPairwisePlots.py +8 -6
  79. validmind/tests/model_validation/embeddings/utils.py +11 -1
  80. validmind/tests/model_validation/ragas/AnswerCorrectness.py +2 -1
  81. validmind/tests/model_validation/ragas/AspectCritic.py +11 -7
  82. validmind/tests/model_validation/ragas/ContextEntityRecall.py +2 -1
  83. validmind/tests/model_validation/ragas/ContextPrecision.py +2 -1
  84. validmind/tests/model_validation/ragas/ContextPrecisionWithoutReference.py +2 -1
  85. validmind/tests/model_validation/ragas/ContextRecall.py +2 -1
  86. validmind/tests/model_validation/ragas/Faithfulness.py +2 -1
  87. validmind/tests/model_validation/ragas/NoiseSensitivity.py +2 -1
  88. validmind/tests/model_validation/ragas/ResponseRelevancy.py +2 -1
  89. validmind/tests/model_validation/ragas/SemanticSimilarity.py +2 -1
  90. validmind/tests/model_validation/sklearn/CalibrationCurve.py +3 -2
  91. validmind/tests/model_validation/sklearn/ClassifierThresholdOptimization.py +2 -5
  92. validmind/tests/model_validation/sklearn/ClusterCosineSimilarity.py +5 -2
  93. validmind/tests/model_validation/sklearn/ConfusionMatrix.py +2 -2
  94. validmind/tests/model_validation/sklearn/FeatureImportance.py +1 -14
  95. validmind/tests/model_validation/sklearn/HyperParametersTuning.py +6 -3
  96. validmind/tests/model_validation/sklearn/KMeansClustersOptimization.py +2 -2
  97. validmind/tests/model_validation/sklearn/MinimumROCAUCScore.py +8 -4
  98. validmind/tests/model_validation/sklearn/ModelParameters.py +1 -0
  99. validmind/tests/model_validation/sklearn/OverfitDiagnosis.py +3 -3
  100. validmind/tests/model_validation/sklearn/PermutationFeatureImportance.py +2 -2
  101. validmind/tests/model_validation/sklearn/PopulationStabilityIndex.py +20 -16
  102. validmind/tests/model_validation/sklearn/PrecisionRecallCurve.py +4 -2
  103. validmind/tests/model_validation/sklearn/ROCCurve.py +1 -1
  104. validmind/tests/model_validation/sklearn/RegressionR2Square.py +7 -9
  105. validmind/tests/model_validation/sklearn/RegressionR2SquareComparison.py +1 -3
  106. validmind/tests/model_validation/sklearn/SHAPGlobalImportance.py +2 -1
  107. validmind/tests/model_validation/sklearn/ScoreProbabilityAlignment.py +2 -1
  108. validmind/tests/model_validation/sklearn/SilhouettePlot.py +5 -3
  109. validmind/tests/model_validation/sklearn/TrainingTestDegradation.py +9 -1
  110. validmind/tests/model_validation/sklearn/WeakspotsDiagnosis.py +1 -1
  111. validmind/tests/model_validation/statsmodels/CumulativePredictionProbabilities.py +11 -4
  112. validmind/tests/model_validation/statsmodels/DurbinWatsonTest.py +1 -3
  113. validmind/tests/model_validation/statsmodels/GINITable.py +7 -15
  114. validmind/tests/model_validation/statsmodels/Lilliefors.py +2 -2
  115. validmind/tests/model_validation/statsmodels/RegressionCoeffs.py +1 -1
  116. validmind/tests/model_validation/statsmodels/RegressionFeatureSignificance.py +2 -2
  117. validmind/tests/model_validation/statsmodels/RegressionModelForecastPlotLevels.py +5 -2
  118. validmind/tests/model_validation/statsmodels/RegressionModelSensitivityPlot.py +5 -2
  119. validmind/tests/model_validation/statsmodels/RegressionModelSummary.py +7 -7
  120. validmind/tests/model_validation/statsmodels/RegressionPermutationFeatureImportance.py +2 -2
  121. validmind/tests/ongoing_monitoring/CalibrationCurveDrift.py +3 -1
  122. validmind/tests/ongoing_monitoring/ClassDiscriminationDrift.py +4 -2
  123. validmind/tests/ongoing_monitoring/ClassImbalanceDrift.py +4 -2
  124. validmind/tests/ongoing_monitoring/ClassificationAccuracyDrift.py +3 -1
  125. validmind/tests/ongoing_monitoring/ConfusionMatrixDrift.py +3 -1
  126. validmind/tests/ongoing_monitoring/CumulativePredictionProbabilitiesDrift.py +3 -1
  127. validmind/tests/ongoing_monitoring/FeatureDrift.py +1 -0
  128. validmind/tests/ongoing_monitoring/PredictionCorrelation.py +1 -0
  129. validmind/tests/ongoing_monitoring/PredictionProbabilitiesHistogramDrift.py +3 -1
  130. validmind/tests/ongoing_monitoring/PredictionQuantilesAcrossFeatures.py +1 -0
  131. validmind/tests/ongoing_monitoring/ROCCurveDrift.py +3 -2
  132. validmind/tests/ongoing_monitoring/ScoreBandsDrift.py +4 -2
  133. validmind/tests/ongoing_monitoring/ScorecardHistogramDrift.py +3 -1
  134. validmind/tests/ongoing_monitoring/TargetPredictionDistributionPlot.py +4 -3
  135. validmind/tests/prompt_validation/Bias.py +13 -9
  136. validmind/tests/prompt_validation/Clarity.py +13 -9
  137. validmind/tests/prompt_validation/Conciseness.py +13 -9
  138. validmind/tests/prompt_validation/Delimitation.py +13 -9
  139. validmind/tests/prompt_validation/NegativeInstruction.py +14 -11
  140. validmind/tests/prompt_validation/Robustness.py +6 -2
  141. validmind/tests/prompt_validation/Specificity.py +13 -9
  142. validmind/tests/run.py +6 -0
  143. validmind/utils.py +7 -8
  144. {validmind-2.7.6.dist-info → validmind-2.7.8.dist-info}/METADATA +1 -2
  145. {validmind-2.7.6.dist-info → validmind-2.7.8.dist-info}/RECORD +148 -148
  146. {validmind-2.7.6.dist-info → validmind-2.7.8.dist-info}/WHEEL +1 -1
  147. {validmind-2.7.6.dist-info → validmind-2.7.8.dist-info}/LICENSE +0 -0
  148. {validmind-2.7.6.dist-info → validmind-2.7.8.dist-info}/entry_points.txt +0 -0
@@ -6,7 +6,7 @@ import pandas as pd
6
6
  import plotly.graph_objects as go
7
7
  from scipy.stats import kstest
8
8
 
9
- from validmind import tags, tasks
9
+ from validmind import RawData, tags, tasks
10
10
 
11
11
 
12
12
  @tags("regression")
@@ -102,4 +102,4 @@ def ModelPredictionResiduals(
102
102
  # Create a summary DataFrame for the KS normality test results
103
103
  summary_df = pd.DataFrame([summary])
104
104
 
105
- return (summary_df, *figures)
105
+ return (summary_df, *figures, RawData(residuals=residuals))
@@ -6,7 +6,7 @@ import evaluate
6
6
  import pandas as pd
7
7
  import plotly.graph_objects as go
8
8
 
9
- from validmind import tags, tasks
9
+ from validmind import RawData, tags, tasks
10
10
  from validmind.tests.utils import validate_prediction
11
11
 
12
12
 
@@ -142,4 +142,8 @@ def RegardScore(dataset, model):
142
142
  ]
143
143
  ]
144
144
 
145
- return (result_df, *tuple(figures))
145
+ return (
146
+ result_df,
147
+ *figures,
148
+ RawData(true_regard=true_df, pred_regard=pred_df),
149
+ )
@@ -6,7 +6,7 @@ import numpy as np
6
6
  import plotly.figure_factory as ff
7
7
  import plotly.graph_objects as go
8
8
 
9
- from validmind import tags, tasks
9
+ from validmind import RawData, tags, tasks
10
10
  from validmind.vm_models import VMDataset, VMModel
11
11
 
12
12
 
@@ -60,8 +60,9 @@ def RegressionResidualsPlot(model: VMModel, dataset: VMDataset, bin_size: float
60
60
  figures = []
61
61
 
62
62
  # Residuals plot
63
+ residuals = y_true.flatten() - y_pred.flatten()
63
64
  fig = ff.create_distplot(
64
- hist_data=[y_true.flatten() - y_pred.flatten()],
65
+ hist_data=[residuals],
65
66
  group_labels=["Residuals"],
66
67
  bin_size=[bin_size],
67
68
  show_hist=True,
@@ -104,4 +105,4 @@ def RegressionResidualsPlot(model: VMModel, dataset: VMDataset, bin_size: float
104
105
  )
105
106
  )
106
107
 
107
- return tuple(figures)
108
+ return (*figures, RawData(residuals=residuals, y_true=y_true, y_pred=y_pred))
@@ -6,7 +6,7 @@ import pandas as pd
6
6
  import plotly.graph_objects as go
7
7
  from rouge import Rouge
8
8
 
9
- from validmind import tags, tasks
9
+ from validmind import RawData, tags, tasks
10
10
 
11
11
 
12
12
  @tags("nlp", "text_data", "visualization")
@@ -118,7 +118,8 @@ def RougeScore(dataset, model, metric="rouge-1"):
118
118
  {"p": "Precision", "r": "Recall", "f": "F1 Score"}
119
119
  )
120
120
 
121
- # Create a DataFrame from all collected statistics
122
- result_df = pd.DataFrame(stats_df).reset_index().rename(columns={"index": "Metric"})
123
-
124
- return (result_df, *tuple(figures))
121
+ return (
122
+ pd.DataFrame(stats_df).reset_index().rename(columns={"index": "Metric"}),
123
+ *figures,
124
+ RawData(rouge_scores_df=df_scores),
125
+ )
@@ -7,7 +7,7 @@ import pandas as pd
7
7
  import plotly.graph_objects as go
8
8
  from scipy.stats import norm
9
9
 
10
- from validmind import tags, tasks
10
+ from validmind import RawData, tags, tasks
11
11
 
12
12
 
13
13
  @tags("model_predictions", "visualization")
@@ -144,4 +144,13 @@ def TimeSeriesPredictionWithCI(dataset, model, confidence=0.95):
144
144
  template="plotly_white",
145
145
  )
146
146
 
147
- return fig, breaches_df
147
+ return (
148
+ fig,
149
+ breaches_df,
150
+ RawData(
151
+ errors=errors,
152
+ z_score=z_score,
153
+ lower_confidence=lower_conf,
154
+ upper_confidence=upper_conf,
155
+ ),
156
+ )
@@ -5,7 +5,7 @@
5
5
  import pandas as pd
6
6
  import plotly.graph_objects as go
7
7
 
8
- from validmind import tags, tasks
8
+ from validmind import RawData, tags, tasks
9
9
 
10
10
 
11
11
  @tags("nlp", "text_data", "visualization")
@@ -108,4 +108,4 @@ def TokenDisparity(dataset, model):
108
108
  # Create a DataFrame from all collected statistics
109
109
  result_df = pd.DataFrame(stats_df).reset_index().rename(columns={"index": "Metric"})
110
110
 
111
- return (result_df, *tuple(figures))
111
+ return (result_df, *figures, RawData(token_counts_df=df))
@@ -6,7 +6,7 @@ import evaluate
6
6
  import pandas as pd
7
7
  import plotly.graph_objects as go
8
8
 
9
- from validmind import tags, tasks
9
+ from validmind import RawData, tags, tasks
10
10
 
11
11
 
12
12
  @tags("nlp", "text_data", "visualization")
@@ -139,4 +139,12 @@ def ToxicityScore(dataset, model):
139
139
  ]
140
140
  ]
141
141
 
142
- return (result_df, *tuple(figures))
142
+ return (
143
+ result_df,
144
+ *tuple(figures),
145
+ RawData(
146
+ input_toxicity_df=input_df,
147
+ true_toxicity_df=true_df,
148
+ pred_toxicity_df=pred_df,
149
+ ),
150
+ )
@@ -5,7 +5,7 @@
5
5
  import plotly.express as px
6
6
  from sklearn.cluster import KMeans
7
7
 
8
- from validmind import tags, tasks
8
+ from validmind import RawData, tags, tasks
9
9
  from validmind.vm_models import VMDataset, VMModel
10
10
 
11
11
 
@@ -52,8 +52,14 @@ def ClusterDistribution(model: VMModel, dataset: VMDataset, num_clusters: int =
52
52
  - Uses the KMeans clustering algorithm, which assumes that clusters are convex and isotropic, and may not work as
53
53
  intended if the true clusters in the data are not of this shape.
54
54
  """
55
- return px.histogram(
56
- KMeans(n_clusters=num_clusters).fit(dataset.y_pred(model)).labels_,
55
+ embeddings = dataset.y_pred(model)
56
+ kmeans = KMeans(n_clusters=num_clusters).fit(embeddings)
57
+ labels = kmeans.labels_
58
+
59
+ fig = px.histogram(
60
+ labels,
57
61
  nbins=num_clusters,
58
62
  title="Embeddings Cluster Distribution",
59
63
  )
64
+
65
+ return fig, RawData(labels=labels)
@@ -9,7 +9,7 @@ import pandas as pd
9
9
  import plotly.express as px
10
10
  from sklearn.metrics.pairwise import cosine_similarity
11
11
 
12
- from validmind import tags, tasks
12
+ from validmind import RawData, tags, tasks
13
13
 
14
14
 
15
15
  @tags("visualization", "dimensionality_reduction", "embeddings")
@@ -63,6 +63,7 @@ def CosineSimilarityComparison(dataset, models):
63
63
  figures = []
64
64
  # Initialize a list to store data for the DataFrame
65
65
  all_stats = []
66
+ similarity_matrices = []
66
67
 
67
68
  # Generate all pairs of models for comparison
68
69
  for model_A, model_B in combinations(models, 2):
@@ -73,6 +74,15 @@ def CosineSimilarityComparison(dataset, models):
73
74
  similarity_matrix = cosine_similarity(embeddings_A, embeddings_B)
74
75
  similarities = similarity_matrix.flatten()
75
76
 
77
+ # store similarity matrix
78
+ similarity_matrices.append(
79
+ {
80
+ "model_A": model_A.input_id,
81
+ "model_B": model_B.input_id,
82
+ "similarity_matrix": similarity_matrix,
83
+ }
84
+ )
85
+
76
86
  # Generate statistics and add model combination as a column
77
87
  stats_data = {
78
88
  "Combination": f"{model_A.input_id} vs {model_B.input_id}",
@@ -100,4 +110,8 @@ def CosineSimilarityComparison(dataset, models):
100
110
  # Create a DataFrame from all collected statistics
101
111
  stats_df = pd.DataFrame(all_stats)
102
112
 
103
- return (stats_df, *tuple(figures))
113
+ return (
114
+ *figures,
115
+ stats_df,
116
+ RawData(similarity_matrices=pd.DataFrame(similarity_matrices)),
117
+ )
@@ -5,7 +5,7 @@
5
5
  import plotly.express as px
6
6
  from sklearn.metrics.pairwise import cosine_similarity
7
7
 
8
- from validmind import tags, tasks
8
+ from validmind import RawData, tags, tasks
9
9
  from validmind.vm_models import VMDataset, VMModel
10
10
 
11
11
 
@@ -52,9 +52,11 @@ def CosineSimilarityDistribution(dataset: VMDataset, model: VMModel):
52
52
  - The output is sensitive to the choice of bin number for the histogram. Different bin numbers could give a
53
53
  slightly altered perspective on the distribution of cosine similarity.
54
54
  """
55
+ similarity_scores = cosine_similarity(dataset.y_pred(model)).flatten()
56
+
55
57
  return px.histogram(
56
- x=cosine_similarity(dataset.y_pred(model)).flatten(),
58
+ x=similarity_scores,
57
59
  nbins=100,
58
60
  title="Cosine Similarity Distribution",
59
61
  labels={"x": "Cosine Similarity"},
60
- )
62
+ ), RawData(similarity_scores=similarity_scores)
@@ -6,7 +6,7 @@ import numpy as np
6
6
  import plotly.express as px
7
7
  from sklearn.metrics.pairwise import cosine_similarity
8
8
 
9
- from validmind import tags, tasks
9
+ from validmind import RawData, tags, tasks
10
10
 
11
11
 
12
12
  @tags("visualization", "dimensionality_reduction", "embeddings")
@@ -81,4 +81,4 @@ def CosineSimilarityHeatmap(
81
81
  yaxis_title=yaxis_title,
82
82
  )
83
83
 
84
- return fig
84
+ return fig, RawData(similarity_matrix=similarity_matrix)
@@ -5,7 +5,7 @@
5
5
  import numpy as np
6
6
  import plotly.express as px
7
7
 
8
- from validmind import tags, tasks
8
+ from validmind import RawData, tags, tasks
9
9
  from validmind.vm_models import VMDataset, VMModel
10
10
 
11
11
 
@@ -55,17 +55,27 @@ def DescriptiveAnalytics(dataset: VMDataset, model: VMModel):
55
55
  - While it displays valuable information about the central tendency and spread of data, it does not provide
56
56
  information about correlations between different embedding dimensions.
57
57
  """
58
+ y_pred = dataset.y_pred(model)
59
+ embedding_means = np.mean(y_pred, axis=0)
60
+ embedding_medians = np.median(y_pred, axis=0)
61
+ embedding_stds = np.std(y_pred, axis=0)
62
+
58
63
  return (
59
64
  px.histogram(
60
- x=np.mean(dataset.y_pred(model), axis=0),
65
+ x=embedding_means,
61
66
  title="Distribution of Embedding Means",
62
67
  ),
63
68
  px.histogram(
64
- x=np.median(dataset.y_pred(model), axis=0),
69
+ x=embedding_medians,
65
70
  title="Distribution of Embedding Medians",
66
71
  ),
67
72
  px.histogram(
68
- x=np.std(dataset.y_pred(model), axis=0),
73
+ x=embedding_stds,
69
74
  title="Distribution of Embedding Standard Deviations",
70
75
  ),
76
+ RawData(
77
+ embedding_means=embedding_means,
78
+ embedding_medians=embedding_medians,
79
+ embedding_stds=embedding_stds,
80
+ ),
71
81
  )
@@ -7,7 +7,7 @@ from typing import Union
7
7
  import plotly.express as px
8
8
  from sklearn.manifold import TSNE
9
9
 
10
- from validmind import tags, tasks
10
+ from validmind import RawData, tags, tasks
11
11
  from validmind.logging import get_logger
12
12
  from validmind.vm_models import VMDataset, VMModel
13
13
 
@@ -89,4 +89,4 @@ def EmbeddingsVisualization2D(
89
89
  fig = px.scatter(**scatter_kwargs)
90
90
  fig.update_layout(width=500, height=500)
91
91
 
92
- return fig
92
+ return fig, RawData(tsne_embeddings=reduced_embeddings)
@@ -9,7 +9,7 @@ import pandas as pd
9
9
  import plotly.express as px
10
10
  from sklearn.metrics.pairwise import euclidean_distances
11
11
 
12
- from validmind import tags, tasks
12
+ from validmind import RawData, tags, tasks
13
13
 
14
14
 
15
15
  @tags("visualization", "dimensionality_reduction", "embeddings")
@@ -57,6 +57,8 @@ def EuclideanDistanceComparison(dataset, models):
57
57
  figures = []
58
58
  all_stats = []
59
59
 
60
+ distance_matrices = {}
61
+
60
62
  # Generate all pairs of models for comparison
61
63
  for model_A, model_B in combinations(models, 2):
62
64
  embeddings_A = np.stack(dataset.y_pred(model_A))
@@ -66,6 +68,15 @@ def EuclideanDistanceComparison(dataset, models):
66
68
  distance_matrix = euclidean_distances(embeddings_A, embeddings_B)
67
69
  distances = distance_matrix.flatten()
68
70
 
71
+ # Store raw distance matrix for each pair-wise comparison
72
+ distance_matrices.append(
73
+ {
74
+ "model_A": model_A.input_id,
75
+ "model_B": model_B.input_id,
76
+ "distance_matrix": distance_matrix,
77
+ }
78
+ )
79
+
69
80
  # Generate statistics and add model combination as a column
70
81
  stats_data = {
71
82
  "Combination": f"{model_A.input_id} vs {model_B.input_id}",
@@ -93,4 +104,7 @@ def EuclideanDistanceComparison(dataset, models):
93
104
  # Create a DataFrame from all collected statistics
94
105
  stats_df = pd.DataFrame(all_stats)
95
106
 
96
- return (stats_df, *tuple(figures))
107
+ # Add raw data to return
108
+ raw_data = RawData(distance_matrices=pd.DataFrame(distance_matrices))
109
+
110
+ return (stats_df, *figures, raw_data)
@@ -6,7 +6,7 @@ import numpy as np
6
6
  import plotly.express as px
7
7
  from sklearn.metrics.pairwise import euclidean_distances
8
8
 
9
- from validmind import tags, tasks
9
+ from validmind import RawData, tags, tasks
10
10
 
11
11
 
12
12
  @tags("visualization", "dimensionality_reduction", "embeddings")
@@ -79,4 +79,4 @@ def EuclideanDistanceHeatmap(
79
79
  yaxis_title=yaxis_title,
80
80
  )
81
81
 
82
- return fig
82
+ return fig, RawData(distance_matrix=distance_matrix)
@@ -10,7 +10,7 @@ import plotly.express as px
10
10
  from sklearn.decomposition import PCA
11
11
  from sklearn.preprocessing import StandardScaler
12
12
 
13
- from validmind import tags, tasks
13
+ from validmind import RawData, tags, tasks
14
14
 
15
15
 
16
16
  @tags("visualization", "dimensionality_reduction", "embeddings")
@@ -74,7 +74,7 @@ def PCAComponentsPairwisePlots(dataset, model, n_components=3):
74
74
  )
75
75
 
76
76
  # List to store each plot
77
- plots = []
77
+ figures = []
78
78
 
79
79
  # Create plots for each pair of principal components
80
80
  for pc1, pc2 in itertools.combinations(range(1, n_components + 1), 2):
@@ -88,7 +88,6 @@ def PCAComponentsPairwisePlots(dataset, model, n_components=3):
88
88
  f"PC{pc2}": f"Principal Component {pc2}",
89
89
  },
90
90
  )
91
- plots.append(fig)
91
+ figures.append(fig)
92
92
 
93
- # Return the list of plots as a tuple
94
- return tuple(plots)
93
+ return (*figures, RawData(pca_results=pca_df))
@@ -5,7 +5,7 @@
5
5
  import re
6
6
  from typing import Dict
7
7
 
8
- from validmind import tags, tasks
8
+ from validmind import RawData, tags, tasks
9
9
  from validmind.vm_models import VMDataset, VMModel
10
10
 
11
11
  from .utils import create_stability_analysis_result
@@ -91,8 +91,10 @@ def StabilityAnalysisKeyword(
91
91
  perturb_data
92
92
  )
93
93
 
94
- return create_stability_analysis_result(
94
+ raw_data, results = create_stability_analysis_result(
95
95
  dataset.y_pred(model),
96
96
  model.predict(perturbed_df),
97
97
  mean_similarity_threshold,
98
98
  )
99
+
100
+ return results, RawData(original_perturbed_similarity=raw_data)
@@ -5,7 +5,7 @@
5
5
  import random
6
6
  import string
7
7
 
8
- from validmind import tags, tasks
8
+ from validmind import RawData, tags, tasks
9
9
  from validmind.vm_models import VMDataset, VMModel
10
10
 
11
11
  from .utils import create_stability_analysis_result
@@ -145,8 +145,10 @@ def StabilityAnalysisRandomNoise(
145
145
  perturb_data
146
146
  )
147
147
 
148
- return create_stability_analysis_result(
148
+ raw_data, result = create_stability_analysis_result(
149
149
  dataset.y_pred(model),
150
150
  model.predict(perturbed_df),
151
151
  mean_similarity_threshold,
152
152
  )
153
+
154
+ return result, RawData(original_perturbed_similarity=raw_data)
@@ -7,7 +7,7 @@ import random
7
7
  import nltk
8
8
  from nltk.corpus import wordnet as wn
9
9
 
10
- from validmind import tags, tasks
10
+ from validmind import RawData, tags, tasks
11
11
  from validmind.vm_models import VMDataset, VMModel
12
12
 
13
13
  from .utils import create_stability_analysis_result
@@ -101,8 +101,10 @@ def StabilityAnalysisSynonyms(
101
101
  perturb_data
102
102
  )
103
103
 
104
- return create_stability_analysis_result(
104
+ raw_data, result = create_stability_analysis_result(
105
105
  dataset.y_pred(model),
106
106
  model.predict(perturbed_df),
107
107
  mean_similarity_threshold,
108
108
  )
109
+
110
+ return result, RawData(original_perturbed_similarity=raw_data)
@@ -4,7 +4,7 @@
4
4
 
5
5
  from transformers import MarianMTModel, MarianTokenizer
6
6
 
7
- from validmind import tags, tasks
7
+ from validmind import RawData, tags, tasks
8
8
  from validmind.logging import get_logger
9
9
  from validmind.vm_models import VMDataset, VMModel
10
10
 
@@ -128,8 +128,10 @@ def StabilityAnalysisTranslation(
128
128
  perturb_data
129
129
  )
130
130
 
131
- return create_stability_analysis_result(
131
+ raw_data, result = create_stability_analysis_result(
132
132
  dataset.y_pred(model),
133
133
  model.predict(perturbed_df),
134
134
  mean_similarity_threshold,
135
135
  )
136
+
137
+ return result, RawData(original_perturbed_similarity=raw_data)
@@ -10,7 +10,7 @@ import plotly.express as px
10
10
  from sklearn.manifold import TSNE
11
11
  from sklearn.preprocessing import StandardScaler
12
12
 
13
- from validmind import tags, tasks
13
+ from validmind import RawData, tags, tasks
14
14
 
15
15
 
16
16
  @tags("visualization", "dimensionality_reduction", "embeddings")
@@ -80,7 +80,7 @@ def TSNEComponentsPairwisePlots(
80
80
  )
81
81
 
82
82
  # List to store each plot
83
- plots = []
83
+ figures = []
84
84
 
85
85
  # Create plots for each pair of t-SNE components (if n_components > 1)
86
86
  if n_components > 1:
@@ -95,7 +95,7 @@ def TSNEComponentsPairwisePlots(
95
95
  f"Component {comp2}": f"Component {comp2}",
96
96
  },
97
97
  )
98
- plots.append(fig)
98
+ figures.append(fig)
99
99
  else:
100
100
  fig = px.scatter(
101
101
  tsne_df,
@@ -106,7 +106,9 @@ def TSNEComponentsPairwisePlots(
106
106
  "Component 1": "Component 1",
107
107
  },
108
108
  )
109
- plots.append(fig)
109
+ figures.append(fig)
110
110
 
111
- # Return the list of plots as a tuple
112
- return tuple(plots)
111
+ return (
112
+ *figures,
113
+ RawData(embeddings_scaled=embeddings_scaled, tsne_results=tsne_results),
114
+ )
@@ -3,6 +3,7 @@
3
3
  # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
4
 
5
5
  import numpy as np
6
+ import pandas as pd
6
7
  import plotly.express as px
7
8
  from sklearn.metrics.pairwise import cosine_similarity
8
9
 
@@ -17,10 +18,19 @@ def create_stability_analysis_result(
17
18
  original_embeddings, perturbed_embeddings
18
19
  ).diagonal()
19
20
 
21
+ # create a raw dataframe of the original, perturbed and similarity
22
+ raw_data = pd.DataFrame(
23
+ {
24
+ "original": original_embeddings,
25
+ "perturbed": perturbed_embeddings,
26
+ "similarity": similarities,
27
+ }
28
+ )
29
+
20
30
  mean = np.mean(similarities)
21
31
  passed = mean > mean_similarity_threshold
22
32
 
23
- return (
33
+ return raw_data, (
24
34
  [
25
35
  {
26
36
  "Mean Similarity": mean,
@@ -7,7 +7,7 @@ import warnings
7
7
  import plotly.express as px
8
8
  from datasets import Dataset
9
9
 
10
- from validmind import tags, tasks
10
+ from validmind import RawData, tags, tasks
11
11
  from validmind.errors import MissingDependencyError
12
12
 
13
13
  from .utils import get_ragas_config, get_renamed_columns
@@ -144,4 +144,5 @@ def AnswerCorrectness(
144
144
  },
145
145
  fig_histogram,
146
146
  fig_box,
147
+ RawData(evaluation_results=result_df),
147
148
  )
@@ -7,7 +7,7 @@ import warnings
7
7
  import plotly.express as px
8
8
  from datasets import Dataset
9
9
 
10
- from validmind import tags, tasks
10
+ from validmind import RawData, tags, tasks
11
11
  from validmind.errors import MissingDependencyError
12
12
 
13
13
  from .utils import get_ragas_config, get_renamed_columns
@@ -187,9 +187,13 @@ def AspectCritic(
187
187
  title="Aspect Critique Results",
188
188
  )
189
189
 
190
- return {
191
- "Aspect Scores": [
192
- {"Aspect": aspect, "Score": result_df[aspect].mean()}
193
- for aspect in aspects + [aspect.name for aspect in custom_aspects]
194
- ]
195
- }, fig
190
+ return (
191
+ {
192
+ "Aspect Scores": [
193
+ {"Aspect": aspect, "Score": result_df[aspect].mean()}
194
+ for aspect in aspects + [aspect.name for aspect in custom_aspects]
195
+ ]
196
+ },
197
+ fig,
198
+ RawData(evaluation_results=result_df),
199
+ )
@@ -7,7 +7,7 @@ import warnings
7
7
  import plotly.express as px
8
8
  from datasets import Dataset
9
9
 
10
- from validmind import tags, tasks
10
+ from validmind import RawData, tags, tasks
11
11
  from validmind.errors import MissingDependencyError
12
12
 
13
13
  from .utils import get_ragas_config, get_renamed_columns
@@ -143,4 +143,5 @@ def ContextEntityRecall(
143
143
  },
144
144
  fig_histogram,
145
145
  fig_box,
146
+ RawData(evaluation_results=result_df),
146
147
  )
@@ -7,7 +7,7 @@ import warnings
7
7
  import plotly.express as px
8
8
  from datasets import Dataset
9
9
 
10
- from validmind import tags, tasks
10
+ from validmind import RawData, tags, tasks
11
11
  from validmind.errors import MissingDependencyError
12
12
 
13
13
  from .utils import get_ragas_config, get_renamed_columns
@@ -135,4 +135,5 @@ def ContextPrecision(
135
135
  },
136
136
  fig_histogram,
137
137
  fig_box,
138
+ RawData(evaluation_results=result_df),
138
139
  )
@@ -7,7 +7,7 @@ import warnings
7
7
  import plotly.express as px
8
8
  from datasets import Dataset
9
9
 
10
- from validmind import tags, tasks
10
+ from validmind import RawData, tags, tasks
11
11
  from validmind.errors import MissingDependencyError
12
12
 
13
13
  from .utils import get_ragas_config, get_renamed_columns
@@ -130,4 +130,5 @@ def ContextPrecisionWithoutReference(
130
130
  },
131
131
  fig_histogram,
132
132
  fig_box,
133
+ RawData(evaluation_results=result_df),
133
134
  )
@@ -7,7 +7,7 @@ import warnings
7
7
  import plotly.express as px
8
8
  from datasets import Dataset
9
9
 
10
- from validmind import tags, tasks
10
+ from validmind import RawData, tags, tasks
11
11
  from validmind.errors import MissingDependencyError
12
12
 
13
13
  from .utils import get_ragas_config, get_renamed_columns
@@ -135,4 +135,5 @@ def ContextRecall(
135
135
  },
136
136
  fig_histogram,
137
137
  fig_box,
138
+ RawData(evaluation_results=result_df),
138
139
  )