validmind 2.7.6__py3-none-any.whl → 2.7.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- validmind/__init__.py +2 -0
- validmind/__version__.py +1 -1
- validmind/ai/test_descriptions.py +32 -2
- validmind/api_client.py +8 -1
- validmind/datasets/credit_risk/lending_club.py +3 -4
- validmind/html_templates/content_blocks.py +1 -1
- validmind/tests/__types__.py +17 -0
- validmind/tests/data_validation/ACFandPACFPlot.py +6 -2
- validmind/tests/data_validation/AutoMA.py +2 -2
- validmind/tests/data_validation/BivariateScatterPlots.py +4 -2
- validmind/tests/data_validation/BoxPierce.py +2 -2
- validmind/tests/data_validation/ClassImbalance.py +2 -1
- validmind/tests/data_validation/DatasetDescription.py +11 -2
- validmind/tests/data_validation/DatasetSplit.py +2 -2
- validmind/tests/data_validation/DickeyFullerGLS.py +2 -2
- validmind/tests/data_validation/FeatureTargetCorrelationPlot.py +8 -2
- validmind/tests/data_validation/HighCardinality.py +9 -2
- validmind/tests/data_validation/HighPearsonCorrelation.py +6 -2
- validmind/tests/data_validation/IQROutliersBarPlot.py +9 -2
- validmind/tests/data_validation/LaggedCorrelationHeatmap.py +2 -2
- validmind/tests/data_validation/MissingValuesBarPlot.py +12 -9
- validmind/tests/data_validation/MutualInformation.py +6 -8
- validmind/tests/data_validation/PearsonCorrelationMatrix.py +2 -2
- validmind/tests/data_validation/ProtectedClassesCombination.py +6 -1
- validmind/tests/data_validation/ProtectedClassesDescription.py +1 -1
- validmind/tests/data_validation/ProtectedClassesDisparity.py +4 -5
- validmind/tests/data_validation/ProtectedClassesThresholdOptimizer.py +1 -4
- validmind/tests/data_validation/RollingStatsPlot.py +21 -10
- validmind/tests/data_validation/ScatterPlot.py +3 -5
- validmind/tests/data_validation/ScoreBandDefaultRates.py +2 -1
- validmind/tests/data_validation/SeasonalDecompose.py +12 -2
- validmind/tests/data_validation/Skewness.py +6 -3
- validmind/tests/data_validation/SpreadPlot.py +8 -3
- validmind/tests/data_validation/TabularCategoricalBarPlots.py +4 -2
- validmind/tests/data_validation/TabularDateTimeHistograms.py +2 -2
- validmind/tests/data_validation/TargetRateBarPlots.py +4 -3
- validmind/tests/data_validation/TimeSeriesFrequency.py +7 -2
- validmind/tests/data_validation/TimeSeriesMissingValues.py +14 -10
- validmind/tests/data_validation/TimeSeriesOutliers.py +1 -5
- validmind/tests/data_validation/WOEBinPlots.py +2 -2
- validmind/tests/data_validation/WOEBinTable.py +11 -9
- validmind/tests/data_validation/nlp/CommonWords.py +2 -2
- validmind/tests/data_validation/nlp/Hashtags.py +2 -2
- validmind/tests/data_validation/nlp/LanguageDetection.py +9 -6
- validmind/tests/data_validation/nlp/Mentions.py +9 -6
- validmind/tests/data_validation/nlp/PolarityAndSubjectivity.py +2 -2
- validmind/tests/data_validation/nlp/Punctuations.py +4 -2
- validmind/tests/data_validation/nlp/Sentiment.py +2 -2
- validmind/tests/data_validation/nlp/StopWords.py +5 -4
- validmind/tests/data_validation/nlp/TextDescription.py +2 -2
- validmind/tests/data_validation/nlp/Toxicity.py +2 -2
- validmind/tests/model_validation/BertScore.py +2 -2
- validmind/tests/model_validation/BleuScore.py +2 -2
- validmind/tests/model_validation/ClusterSizeDistribution.py +2 -2
- validmind/tests/model_validation/ContextualRecall.py +2 -2
- validmind/tests/model_validation/FeaturesAUC.py +2 -2
- validmind/tests/model_validation/MeteorScore.py +2 -2
- validmind/tests/model_validation/ModelPredictionResiduals.py +2 -2
- validmind/tests/model_validation/RegardScore.py +6 -2
- validmind/tests/model_validation/RegressionResidualsPlot.py +4 -3
- validmind/tests/model_validation/RougeScore.py +6 -5
- validmind/tests/model_validation/TimeSeriesPredictionWithCI.py +11 -2
- validmind/tests/model_validation/TokenDisparity.py +2 -2
- validmind/tests/model_validation/ToxicityScore.py +10 -2
- validmind/tests/model_validation/embeddings/ClusterDistribution.py +9 -3
- validmind/tests/model_validation/embeddings/CosineSimilarityComparison.py +16 -2
- validmind/tests/model_validation/embeddings/CosineSimilarityDistribution.py +5 -3
- validmind/tests/model_validation/embeddings/CosineSimilarityHeatmap.py +2 -2
- validmind/tests/model_validation/embeddings/DescriptiveAnalytics.py +14 -4
- validmind/tests/model_validation/embeddings/EmbeddingsVisualization2D.py +2 -2
- validmind/tests/model_validation/embeddings/EuclideanDistanceComparison.py +16 -2
- validmind/tests/model_validation/embeddings/EuclideanDistanceHeatmap.py +2 -2
- validmind/tests/model_validation/embeddings/PCAComponentsPairwisePlots.py +4 -5
- validmind/tests/model_validation/embeddings/StabilityAnalysisKeyword.py +4 -2
- validmind/tests/model_validation/embeddings/StabilityAnalysisRandomNoise.py +4 -2
- validmind/tests/model_validation/embeddings/StabilityAnalysisSynonyms.py +4 -2
- validmind/tests/model_validation/embeddings/StabilityAnalysisTranslation.py +4 -2
- validmind/tests/model_validation/embeddings/TSNEComponentsPairwisePlots.py +8 -6
- validmind/tests/model_validation/embeddings/utils.py +11 -1
- validmind/tests/model_validation/ragas/AnswerCorrectness.py +2 -1
- validmind/tests/model_validation/ragas/AspectCritic.py +11 -7
- validmind/tests/model_validation/ragas/ContextEntityRecall.py +2 -1
- validmind/tests/model_validation/ragas/ContextPrecision.py +2 -1
- validmind/tests/model_validation/ragas/ContextPrecisionWithoutReference.py +2 -1
- validmind/tests/model_validation/ragas/ContextRecall.py +2 -1
- validmind/tests/model_validation/ragas/Faithfulness.py +2 -1
- validmind/tests/model_validation/ragas/NoiseSensitivity.py +2 -1
- validmind/tests/model_validation/ragas/ResponseRelevancy.py +2 -1
- validmind/tests/model_validation/ragas/SemanticSimilarity.py +2 -1
- validmind/tests/model_validation/sklearn/CalibrationCurve.py +3 -2
- validmind/tests/model_validation/sklearn/ClassifierThresholdOptimization.py +2 -5
- validmind/tests/model_validation/sklearn/ClusterCosineSimilarity.py +5 -2
- validmind/tests/model_validation/sklearn/ConfusionMatrix.py +2 -2
- validmind/tests/model_validation/sklearn/FeatureImportance.py +1 -14
- validmind/tests/model_validation/sklearn/HyperParametersTuning.py +6 -3
- validmind/tests/model_validation/sklearn/KMeansClustersOptimization.py +2 -2
- validmind/tests/model_validation/sklearn/MinimumROCAUCScore.py +8 -4
- validmind/tests/model_validation/sklearn/ModelParameters.py +1 -0
- validmind/tests/model_validation/sklearn/OverfitDiagnosis.py +3 -3
- validmind/tests/model_validation/sklearn/PermutationFeatureImportance.py +2 -2
- validmind/tests/model_validation/sklearn/PopulationStabilityIndex.py +20 -16
- validmind/tests/model_validation/sklearn/PrecisionRecallCurve.py +4 -2
- validmind/tests/model_validation/sklearn/ROCCurve.py +1 -1
- validmind/tests/model_validation/sklearn/RegressionR2Square.py +7 -9
- validmind/tests/model_validation/sklearn/RegressionR2SquareComparison.py +1 -3
- validmind/tests/model_validation/sklearn/SHAPGlobalImportance.py +2 -1
- validmind/tests/model_validation/sklearn/ScoreProbabilityAlignment.py +2 -1
- validmind/tests/model_validation/sklearn/SilhouettePlot.py +5 -3
- validmind/tests/model_validation/sklearn/TrainingTestDegradation.py +9 -1
- validmind/tests/model_validation/sklearn/WeakspotsDiagnosis.py +1 -1
- validmind/tests/model_validation/statsmodels/CumulativePredictionProbabilities.py +11 -4
- validmind/tests/model_validation/statsmodels/DurbinWatsonTest.py +1 -3
- validmind/tests/model_validation/statsmodels/GINITable.py +7 -15
- validmind/tests/model_validation/statsmodels/Lilliefors.py +2 -2
- validmind/tests/model_validation/statsmodels/RegressionCoeffs.py +1 -1
- validmind/tests/model_validation/statsmodels/RegressionFeatureSignificance.py +2 -2
- validmind/tests/model_validation/statsmodels/RegressionModelForecastPlotLevels.py +5 -2
- validmind/tests/model_validation/statsmodels/RegressionModelSensitivityPlot.py +5 -2
- validmind/tests/model_validation/statsmodels/RegressionModelSummary.py +7 -7
- validmind/tests/model_validation/statsmodels/RegressionPermutationFeatureImportance.py +2 -2
- validmind/tests/ongoing_monitoring/CalibrationCurveDrift.py +3 -1
- validmind/tests/ongoing_monitoring/ClassDiscriminationDrift.py +4 -2
- validmind/tests/ongoing_monitoring/ClassImbalanceDrift.py +4 -2
- validmind/tests/ongoing_monitoring/ClassificationAccuracyDrift.py +3 -1
- validmind/tests/ongoing_monitoring/ConfusionMatrixDrift.py +3 -1
- validmind/tests/ongoing_monitoring/CumulativePredictionProbabilitiesDrift.py +3 -1
- validmind/tests/ongoing_monitoring/FeatureDrift.py +1 -0
- validmind/tests/ongoing_monitoring/PredictionCorrelation.py +1 -0
- validmind/tests/ongoing_monitoring/PredictionProbabilitiesHistogramDrift.py +3 -1
- validmind/tests/ongoing_monitoring/PredictionQuantilesAcrossFeatures.py +1 -0
- validmind/tests/ongoing_monitoring/ROCCurveDrift.py +3 -2
- validmind/tests/ongoing_monitoring/ScoreBandsDrift.py +4 -2
- validmind/tests/ongoing_monitoring/ScorecardHistogramDrift.py +3 -1
- validmind/tests/ongoing_monitoring/TargetPredictionDistributionPlot.py +4 -3
- validmind/tests/prompt_validation/Bias.py +13 -9
- validmind/tests/prompt_validation/Clarity.py +13 -9
- validmind/tests/prompt_validation/Conciseness.py +13 -9
- validmind/tests/prompt_validation/Delimitation.py +13 -9
- validmind/tests/prompt_validation/NegativeInstruction.py +14 -11
- validmind/tests/prompt_validation/Robustness.py +6 -2
- validmind/tests/prompt_validation/Specificity.py +13 -9
- validmind/tests/run.py +6 -0
- validmind/utils.py +7 -8
- {validmind-2.7.6.dist-info → validmind-2.7.8.dist-info}/METADATA +1 -2
- {validmind-2.7.6.dist-info → validmind-2.7.8.dist-info}/RECORD +148 -148
- {validmind-2.7.6.dist-info → validmind-2.7.8.dist-info}/WHEEL +1 -1
- {validmind-2.7.6.dist-info → validmind-2.7.8.dist-info}/LICENSE +0 -0
- {validmind-2.7.6.dist-info → validmind-2.7.8.dist-info}/entry_points.txt +0 -0
@@ -6,7 +6,7 @@ import pandas as pd
|
|
6
6
|
import plotly.graph_objects as go
|
7
7
|
from scipy.stats import kstest
|
8
8
|
|
9
|
-
from validmind import tags, tasks
|
9
|
+
from validmind import RawData, tags, tasks
|
10
10
|
|
11
11
|
|
12
12
|
@tags("regression")
|
@@ -102,4 +102,4 @@ def ModelPredictionResiduals(
|
|
102
102
|
# Create a summary DataFrame for the KS normality test results
|
103
103
|
summary_df = pd.DataFrame([summary])
|
104
104
|
|
105
|
-
return (summary_df, *figures)
|
105
|
+
return (summary_df, *figures, RawData(residuals=residuals))
|
@@ -6,7 +6,7 @@ import evaluate
|
|
6
6
|
import pandas as pd
|
7
7
|
import plotly.graph_objects as go
|
8
8
|
|
9
|
-
from validmind import tags, tasks
|
9
|
+
from validmind import RawData, tags, tasks
|
10
10
|
from validmind.tests.utils import validate_prediction
|
11
11
|
|
12
12
|
|
@@ -142,4 +142,8 @@ def RegardScore(dataset, model):
|
|
142
142
|
]
|
143
143
|
]
|
144
144
|
|
145
|
-
return (
|
145
|
+
return (
|
146
|
+
result_df,
|
147
|
+
*figures,
|
148
|
+
RawData(true_regard=true_df, pred_regard=pred_df),
|
149
|
+
)
|
@@ -6,7 +6,7 @@ import numpy as np
|
|
6
6
|
import plotly.figure_factory as ff
|
7
7
|
import plotly.graph_objects as go
|
8
8
|
|
9
|
-
from validmind import tags, tasks
|
9
|
+
from validmind import RawData, tags, tasks
|
10
10
|
from validmind.vm_models import VMDataset, VMModel
|
11
11
|
|
12
12
|
|
@@ -60,8 +60,9 @@ def RegressionResidualsPlot(model: VMModel, dataset: VMDataset, bin_size: float
|
|
60
60
|
figures = []
|
61
61
|
|
62
62
|
# Residuals plot
|
63
|
+
residuals = y_true.flatten() - y_pred.flatten()
|
63
64
|
fig = ff.create_distplot(
|
64
|
-
hist_data=[
|
65
|
+
hist_data=[residuals],
|
65
66
|
group_labels=["Residuals"],
|
66
67
|
bin_size=[bin_size],
|
67
68
|
show_hist=True,
|
@@ -104,4 +105,4 @@ def RegressionResidualsPlot(model: VMModel, dataset: VMDataset, bin_size: float
|
|
104
105
|
)
|
105
106
|
)
|
106
107
|
|
107
|
-
return
|
108
|
+
return (*figures, RawData(residuals=residuals, y_true=y_true, y_pred=y_pred))
|
@@ -6,7 +6,7 @@ import pandas as pd
|
|
6
6
|
import plotly.graph_objects as go
|
7
7
|
from rouge import Rouge
|
8
8
|
|
9
|
-
from validmind import tags, tasks
|
9
|
+
from validmind import RawData, tags, tasks
|
10
10
|
|
11
11
|
|
12
12
|
@tags("nlp", "text_data", "visualization")
|
@@ -118,7 +118,8 @@ def RougeScore(dataset, model, metric="rouge-1"):
|
|
118
118
|
{"p": "Precision", "r": "Recall", "f": "F1 Score"}
|
119
119
|
)
|
120
120
|
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
121
|
+
return (
|
122
|
+
pd.DataFrame(stats_df).reset_index().rename(columns={"index": "Metric"}),
|
123
|
+
*figures,
|
124
|
+
RawData(rouge_scores_df=df_scores),
|
125
|
+
)
|
@@ -7,7 +7,7 @@ import pandas as pd
|
|
7
7
|
import plotly.graph_objects as go
|
8
8
|
from scipy.stats import norm
|
9
9
|
|
10
|
-
from validmind import tags, tasks
|
10
|
+
from validmind import RawData, tags, tasks
|
11
11
|
|
12
12
|
|
13
13
|
@tags("model_predictions", "visualization")
|
@@ -144,4 +144,13 @@ def TimeSeriesPredictionWithCI(dataset, model, confidence=0.95):
|
|
144
144
|
template="plotly_white",
|
145
145
|
)
|
146
146
|
|
147
|
-
return
|
147
|
+
return (
|
148
|
+
fig,
|
149
|
+
breaches_df,
|
150
|
+
RawData(
|
151
|
+
errors=errors,
|
152
|
+
z_score=z_score,
|
153
|
+
lower_confidence=lower_conf,
|
154
|
+
upper_confidence=upper_conf,
|
155
|
+
),
|
156
|
+
)
|
@@ -5,7 +5,7 @@
|
|
5
5
|
import pandas as pd
|
6
6
|
import plotly.graph_objects as go
|
7
7
|
|
8
|
-
from validmind import tags, tasks
|
8
|
+
from validmind import RawData, tags, tasks
|
9
9
|
|
10
10
|
|
11
11
|
@tags("nlp", "text_data", "visualization")
|
@@ -108,4 +108,4 @@ def TokenDisparity(dataset, model):
|
|
108
108
|
# Create a DataFrame from all collected statistics
|
109
109
|
result_df = pd.DataFrame(stats_df).reset_index().rename(columns={"index": "Metric"})
|
110
110
|
|
111
|
-
return (result_df, *
|
111
|
+
return (result_df, *figures, RawData(token_counts_df=df))
|
@@ -6,7 +6,7 @@ import evaluate
|
|
6
6
|
import pandas as pd
|
7
7
|
import plotly.graph_objects as go
|
8
8
|
|
9
|
-
from validmind import tags, tasks
|
9
|
+
from validmind import RawData, tags, tasks
|
10
10
|
|
11
11
|
|
12
12
|
@tags("nlp", "text_data", "visualization")
|
@@ -139,4 +139,12 @@ def ToxicityScore(dataset, model):
|
|
139
139
|
]
|
140
140
|
]
|
141
141
|
|
142
|
-
return (
|
142
|
+
return (
|
143
|
+
result_df,
|
144
|
+
*tuple(figures),
|
145
|
+
RawData(
|
146
|
+
input_toxicity_df=input_df,
|
147
|
+
true_toxicity_df=true_df,
|
148
|
+
pred_toxicity_df=pred_df,
|
149
|
+
),
|
150
|
+
)
|
@@ -5,7 +5,7 @@
|
|
5
5
|
import plotly.express as px
|
6
6
|
from sklearn.cluster import KMeans
|
7
7
|
|
8
|
-
from validmind import tags, tasks
|
8
|
+
from validmind import RawData, tags, tasks
|
9
9
|
from validmind.vm_models import VMDataset, VMModel
|
10
10
|
|
11
11
|
|
@@ -52,8 +52,14 @@ def ClusterDistribution(model: VMModel, dataset: VMDataset, num_clusters: int =
|
|
52
52
|
- Uses the KMeans clustering algorithm, which assumes that clusters are convex and isotropic, and may not work as
|
53
53
|
intended if the true clusters in the data are not of this shape.
|
54
54
|
"""
|
55
|
-
|
56
|
-
|
55
|
+
embeddings = dataset.y_pred(model)
|
56
|
+
kmeans = KMeans(n_clusters=num_clusters).fit(embeddings)
|
57
|
+
labels = kmeans.labels_
|
58
|
+
|
59
|
+
fig = px.histogram(
|
60
|
+
labels,
|
57
61
|
nbins=num_clusters,
|
58
62
|
title="Embeddings Cluster Distribution",
|
59
63
|
)
|
64
|
+
|
65
|
+
return fig, RawData(labels=labels)
|
@@ -9,7 +9,7 @@ import pandas as pd
|
|
9
9
|
import plotly.express as px
|
10
10
|
from sklearn.metrics.pairwise import cosine_similarity
|
11
11
|
|
12
|
-
from validmind import tags, tasks
|
12
|
+
from validmind import RawData, tags, tasks
|
13
13
|
|
14
14
|
|
15
15
|
@tags("visualization", "dimensionality_reduction", "embeddings")
|
@@ -63,6 +63,7 @@ def CosineSimilarityComparison(dataset, models):
|
|
63
63
|
figures = []
|
64
64
|
# Initialize a list to store data for the DataFrame
|
65
65
|
all_stats = []
|
66
|
+
similarity_matrices = []
|
66
67
|
|
67
68
|
# Generate all pairs of models for comparison
|
68
69
|
for model_A, model_B in combinations(models, 2):
|
@@ -73,6 +74,15 @@ def CosineSimilarityComparison(dataset, models):
|
|
73
74
|
similarity_matrix = cosine_similarity(embeddings_A, embeddings_B)
|
74
75
|
similarities = similarity_matrix.flatten()
|
75
76
|
|
77
|
+
# store similarity matrix
|
78
|
+
similarity_matrices.append(
|
79
|
+
{
|
80
|
+
"model_A": model_A.input_id,
|
81
|
+
"model_B": model_B.input_id,
|
82
|
+
"similarity_matrix": similarity_matrix,
|
83
|
+
}
|
84
|
+
)
|
85
|
+
|
76
86
|
# Generate statistics and add model combination as a column
|
77
87
|
stats_data = {
|
78
88
|
"Combination": f"{model_A.input_id} vs {model_B.input_id}",
|
@@ -100,4 +110,8 @@ def CosineSimilarityComparison(dataset, models):
|
|
100
110
|
# Create a DataFrame from all collected statistics
|
101
111
|
stats_df = pd.DataFrame(all_stats)
|
102
112
|
|
103
|
-
return (
|
113
|
+
return (
|
114
|
+
*figures,
|
115
|
+
stats_df,
|
116
|
+
RawData(similarity_matrices=pd.DataFrame(similarity_matrices)),
|
117
|
+
)
|
@@ -5,7 +5,7 @@
|
|
5
5
|
import plotly.express as px
|
6
6
|
from sklearn.metrics.pairwise import cosine_similarity
|
7
7
|
|
8
|
-
from validmind import tags, tasks
|
8
|
+
from validmind import RawData, tags, tasks
|
9
9
|
from validmind.vm_models import VMDataset, VMModel
|
10
10
|
|
11
11
|
|
@@ -52,9 +52,11 @@ def CosineSimilarityDistribution(dataset: VMDataset, model: VMModel):
|
|
52
52
|
- The output is sensitive to the choice of bin number for the histogram. Different bin numbers could give a
|
53
53
|
slightly altered perspective on the distribution of cosine similarity.
|
54
54
|
"""
|
55
|
+
similarity_scores = cosine_similarity(dataset.y_pred(model)).flatten()
|
56
|
+
|
55
57
|
return px.histogram(
|
56
|
-
x=
|
58
|
+
x=similarity_scores,
|
57
59
|
nbins=100,
|
58
60
|
title="Cosine Similarity Distribution",
|
59
61
|
labels={"x": "Cosine Similarity"},
|
60
|
-
)
|
62
|
+
), RawData(similarity_scores=similarity_scores)
|
@@ -6,7 +6,7 @@ import numpy as np
|
|
6
6
|
import plotly.express as px
|
7
7
|
from sklearn.metrics.pairwise import cosine_similarity
|
8
8
|
|
9
|
-
from validmind import tags, tasks
|
9
|
+
from validmind import RawData, tags, tasks
|
10
10
|
|
11
11
|
|
12
12
|
@tags("visualization", "dimensionality_reduction", "embeddings")
|
@@ -81,4 +81,4 @@ def CosineSimilarityHeatmap(
|
|
81
81
|
yaxis_title=yaxis_title,
|
82
82
|
)
|
83
83
|
|
84
|
-
return fig
|
84
|
+
return fig, RawData(similarity_matrix=similarity_matrix)
|
@@ -5,7 +5,7 @@
|
|
5
5
|
import numpy as np
|
6
6
|
import plotly.express as px
|
7
7
|
|
8
|
-
from validmind import tags, tasks
|
8
|
+
from validmind import RawData, tags, tasks
|
9
9
|
from validmind.vm_models import VMDataset, VMModel
|
10
10
|
|
11
11
|
|
@@ -55,17 +55,27 @@ def DescriptiveAnalytics(dataset: VMDataset, model: VMModel):
|
|
55
55
|
- While it displays valuable information about the central tendency and spread of data, it does not provide
|
56
56
|
information about correlations between different embedding dimensions.
|
57
57
|
"""
|
58
|
+
y_pred = dataset.y_pred(model)
|
59
|
+
embedding_means = np.mean(y_pred, axis=0)
|
60
|
+
embedding_medians = np.median(y_pred, axis=0)
|
61
|
+
embedding_stds = np.std(y_pred, axis=0)
|
62
|
+
|
58
63
|
return (
|
59
64
|
px.histogram(
|
60
|
-
x=
|
65
|
+
x=embedding_means,
|
61
66
|
title="Distribution of Embedding Means",
|
62
67
|
),
|
63
68
|
px.histogram(
|
64
|
-
x=
|
69
|
+
x=embedding_medians,
|
65
70
|
title="Distribution of Embedding Medians",
|
66
71
|
),
|
67
72
|
px.histogram(
|
68
|
-
x=
|
73
|
+
x=embedding_stds,
|
69
74
|
title="Distribution of Embedding Standard Deviations",
|
70
75
|
),
|
76
|
+
RawData(
|
77
|
+
embedding_means=embedding_means,
|
78
|
+
embedding_medians=embedding_medians,
|
79
|
+
embedding_stds=embedding_stds,
|
80
|
+
),
|
71
81
|
)
|
@@ -7,7 +7,7 @@ from typing import Union
|
|
7
7
|
import plotly.express as px
|
8
8
|
from sklearn.manifold import TSNE
|
9
9
|
|
10
|
-
from validmind import tags, tasks
|
10
|
+
from validmind import RawData, tags, tasks
|
11
11
|
from validmind.logging import get_logger
|
12
12
|
from validmind.vm_models import VMDataset, VMModel
|
13
13
|
|
@@ -89,4 +89,4 @@ def EmbeddingsVisualization2D(
|
|
89
89
|
fig = px.scatter(**scatter_kwargs)
|
90
90
|
fig.update_layout(width=500, height=500)
|
91
91
|
|
92
|
-
return fig
|
92
|
+
return fig, RawData(tsne_embeddings=reduced_embeddings)
|
@@ -9,7 +9,7 @@ import pandas as pd
|
|
9
9
|
import plotly.express as px
|
10
10
|
from sklearn.metrics.pairwise import euclidean_distances
|
11
11
|
|
12
|
-
from validmind import tags, tasks
|
12
|
+
from validmind import RawData, tags, tasks
|
13
13
|
|
14
14
|
|
15
15
|
@tags("visualization", "dimensionality_reduction", "embeddings")
|
@@ -57,6 +57,8 @@ def EuclideanDistanceComparison(dataset, models):
|
|
57
57
|
figures = []
|
58
58
|
all_stats = []
|
59
59
|
|
60
|
+
distance_matrices = {}
|
61
|
+
|
60
62
|
# Generate all pairs of models for comparison
|
61
63
|
for model_A, model_B in combinations(models, 2):
|
62
64
|
embeddings_A = np.stack(dataset.y_pred(model_A))
|
@@ -66,6 +68,15 @@ def EuclideanDistanceComparison(dataset, models):
|
|
66
68
|
distance_matrix = euclidean_distances(embeddings_A, embeddings_B)
|
67
69
|
distances = distance_matrix.flatten()
|
68
70
|
|
71
|
+
# Store raw distance matrix for each pair-wise comparison
|
72
|
+
distance_matrices.append(
|
73
|
+
{
|
74
|
+
"model_A": model_A.input_id,
|
75
|
+
"model_B": model_B.input_id,
|
76
|
+
"distance_matrix": distance_matrix,
|
77
|
+
}
|
78
|
+
)
|
79
|
+
|
69
80
|
# Generate statistics and add model combination as a column
|
70
81
|
stats_data = {
|
71
82
|
"Combination": f"{model_A.input_id} vs {model_B.input_id}",
|
@@ -93,4 +104,7 @@ def EuclideanDistanceComparison(dataset, models):
|
|
93
104
|
# Create a DataFrame from all collected statistics
|
94
105
|
stats_df = pd.DataFrame(all_stats)
|
95
106
|
|
96
|
-
|
107
|
+
# Add raw data to return
|
108
|
+
raw_data = RawData(distance_matrices=pd.DataFrame(distance_matrices))
|
109
|
+
|
110
|
+
return (stats_df, *figures, raw_data)
|
@@ -6,7 +6,7 @@ import numpy as np
|
|
6
6
|
import plotly.express as px
|
7
7
|
from sklearn.metrics.pairwise import euclidean_distances
|
8
8
|
|
9
|
-
from validmind import tags, tasks
|
9
|
+
from validmind import RawData, tags, tasks
|
10
10
|
|
11
11
|
|
12
12
|
@tags("visualization", "dimensionality_reduction", "embeddings")
|
@@ -79,4 +79,4 @@ def EuclideanDistanceHeatmap(
|
|
79
79
|
yaxis_title=yaxis_title,
|
80
80
|
)
|
81
81
|
|
82
|
-
return fig
|
82
|
+
return fig, RawData(distance_matrix=distance_matrix)
|
@@ -10,7 +10,7 @@ import plotly.express as px
|
|
10
10
|
from sklearn.decomposition import PCA
|
11
11
|
from sklearn.preprocessing import StandardScaler
|
12
12
|
|
13
|
-
from validmind import tags, tasks
|
13
|
+
from validmind import RawData, tags, tasks
|
14
14
|
|
15
15
|
|
16
16
|
@tags("visualization", "dimensionality_reduction", "embeddings")
|
@@ -74,7 +74,7 @@ def PCAComponentsPairwisePlots(dataset, model, n_components=3):
|
|
74
74
|
)
|
75
75
|
|
76
76
|
# List to store each plot
|
77
|
-
|
77
|
+
figures = []
|
78
78
|
|
79
79
|
# Create plots for each pair of principal components
|
80
80
|
for pc1, pc2 in itertools.combinations(range(1, n_components + 1), 2):
|
@@ -88,7 +88,6 @@ def PCAComponentsPairwisePlots(dataset, model, n_components=3):
|
|
88
88
|
f"PC{pc2}": f"Principal Component {pc2}",
|
89
89
|
},
|
90
90
|
)
|
91
|
-
|
91
|
+
figures.append(fig)
|
92
92
|
|
93
|
-
|
94
|
-
return tuple(plots)
|
93
|
+
return (*figures, RawData(pca_results=pca_df))
|
@@ -5,7 +5,7 @@
|
|
5
5
|
import re
|
6
6
|
from typing import Dict
|
7
7
|
|
8
|
-
from validmind import tags, tasks
|
8
|
+
from validmind import RawData, tags, tasks
|
9
9
|
from validmind.vm_models import VMDataset, VMModel
|
10
10
|
|
11
11
|
from .utils import create_stability_analysis_result
|
@@ -91,8 +91,10 @@ def StabilityAnalysisKeyword(
|
|
91
91
|
perturb_data
|
92
92
|
)
|
93
93
|
|
94
|
-
|
94
|
+
raw_data, results = create_stability_analysis_result(
|
95
95
|
dataset.y_pred(model),
|
96
96
|
model.predict(perturbed_df),
|
97
97
|
mean_similarity_threshold,
|
98
98
|
)
|
99
|
+
|
100
|
+
return results, RawData(original_perturbed_similarity=raw_data)
|
@@ -5,7 +5,7 @@
|
|
5
5
|
import random
|
6
6
|
import string
|
7
7
|
|
8
|
-
from validmind import tags, tasks
|
8
|
+
from validmind import RawData, tags, tasks
|
9
9
|
from validmind.vm_models import VMDataset, VMModel
|
10
10
|
|
11
11
|
from .utils import create_stability_analysis_result
|
@@ -145,8 +145,10 @@ def StabilityAnalysisRandomNoise(
|
|
145
145
|
perturb_data
|
146
146
|
)
|
147
147
|
|
148
|
-
|
148
|
+
raw_data, result = create_stability_analysis_result(
|
149
149
|
dataset.y_pred(model),
|
150
150
|
model.predict(perturbed_df),
|
151
151
|
mean_similarity_threshold,
|
152
152
|
)
|
153
|
+
|
154
|
+
return result, RawData(original_perturbed_similarity=raw_data)
|
@@ -7,7 +7,7 @@ import random
|
|
7
7
|
import nltk
|
8
8
|
from nltk.corpus import wordnet as wn
|
9
9
|
|
10
|
-
from validmind import tags, tasks
|
10
|
+
from validmind import RawData, tags, tasks
|
11
11
|
from validmind.vm_models import VMDataset, VMModel
|
12
12
|
|
13
13
|
from .utils import create_stability_analysis_result
|
@@ -101,8 +101,10 @@ def StabilityAnalysisSynonyms(
|
|
101
101
|
perturb_data
|
102
102
|
)
|
103
103
|
|
104
|
-
|
104
|
+
raw_data, result = create_stability_analysis_result(
|
105
105
|
dataset.y_pred(model),
|
106
106
|
model.predict(perturbed_df),
|
107
107
|
mean_similarity_threshold,
|
108
108
|
)
|
109
|
+
|
110
|
+
return result, RawData(original_perturbed_similarity=raw_data)
|
@@ -4,7 +4,7 @@
|
|
4
4
|
|
5
5
|
from transformers import MarianMTModel, MarianTokenizer
|
6
6
|
|
7
|
-
from validmind import tags, tasks
|
7
|
+
from validmind import RawData, tags, tasks
|
8
8
|
from validmind.logging import get_logger
|
9
9
|
from validmind.vm_models import VMDataset, VMModel
|
10
10
|
|
@@ -128,8 +128,10 @@ def StabilityAnalysisTranslation(
|
|
128
128
|
perturb_data
|
129
129
|
)
|
130
130
|
|
131
|
-
|
131
|
+
raw_data, result = create_stability_analysis_result(
|
132
132
|
dataset.y_pred(model),
|
133
133
|
model.predict(perturbed_df),
|
134
134
|
mean_similarity_threshold,
|
135
135
|
)
|
136
|
+
|
137
|
+
return result, RawData(original_perturbed_similarity=raw_data)
|
@@ -10,7 +10,7 @@ import plotly.express as px
|
|
10
10
|
from sklearn.manifold import TSNE
|
11
11
|
from sklearn.preprocessing import StandardScaler
|
12
12
|
|
13
|
-
from validmind import tags, tasks
|
13
|
+
from validmind import RawData, tags, tasks
|
14
14
|
|
15
15
|
|
16
16
|
@tags("visualization", "dimensionality_reduction", "embeddings")
|
@@ -80,7 +80,7 @@ def TSNEComponentsPairwisePlots(
|
|
80
80
|
)
|
81
81
|
|
82
82
|
# List to store each plot
|
83
|
-
|
83
|
+
figures = []
|
84
84
|
|
85
85
|
# Create plots for each pair of t-SNE components (if n_components > 1)
|
86
86
|
if n_components > 1:
|
@@ -95,7 +95,7 @@ def TSNEComponentsPairwisePlots(
|
|
95
95
|
f"Component {comp2}": f"Component {comp2}",
|
96
96
|
},
|
97
97
|
)
|
98
|
-
|
98
|
+
figures.append(fig)
|
99
99
|
else:
|
100
100
|
fig = px.scatter(
|
101
101
|
tsne_df,
|
@@ -106,7 +106,9 @@ def TSNEComponentsPairwisePlots(
|
|
106
106
|
"Component 1": "Component 1",
|
107
107
|
},
|
108
108
|
)
|
109
|
-
|
109
|
+
figures.append(fig)
|
110
110
|
|
111
|
-
|
112
|
-
|
111
|
+
return (
|
112
|
+
*figures,
|
113
|
+
RawData(embeddings_scaled=embeddings_scaled, tsne_results=tsne_results),
|
114
|
+
)
|
@@ -3,6 +3,7 @@
|
|
3
3
|
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
4
|
|
5
5
|
import numpy as np
|
6
|
+
import pandas as pd
|
6
7
|
import plotly.express as px
|
7
8
|
from sklearn.metrics.pairwise import cosine_similarity
|
8
9
|
|
@@ -17,10 +18,19 @@ def create_stability_analysis_result(
|
|
17
18
|
original_embeddings, perturbed_embeddings
|
18
19
|
).diagonal()
|
19
20
|
|
21
|
+
# create a raw dataframe of the original, perturbed and similarity
|
22
|
+
raw_data = pd.DataFrame(
|
23
|
+
{
|
24
|
+
"original": original_embeddings,
|
25
|
+
"perturbed": perturbed_embeddings,
|
26
|
+
"similarity": similarities,
|
27
|
+
}
|
28
|
+
)
|
29
|
+
|
20
30
|
mean = np.mean(similarities)
|
21
31
|
passed = mean > mean_similarity_threshold
|
22
32
|
|
23
|
-
return (
|
33
|
+
return raw_data, (
|
24
34
|
[
|
25
35
|
{
|
26
36
|
"Mean Similarity": mean,
|
@@ -7,7 +7,7 @@ import warnings
|
|
7
7
|
import plotly.express as px
|
8
8
|
from datasets import Dataset
|
9
9
|
|
10
|
-
from validmind import tags, tasks
|
10
|
+
from validmind import RawData, tags, tasks
|
11
11
|
from validmind.errors import MissingDependencyError
|
12
12
|
|
13
13
|
from .utils import get_ragas_config, get_renamed_columns
|
@@ -144,4 +144,5 @@ def AnswerCorrectness(
|
|
144
144
|
},
|
145
145
|
fig_histogram,
|
146
146
|
fig_box,
|
147
|
+
RawData(evaluation_results=result_df),
|
147
148
|
)
|
@@ -7,7 +7,7 @@ import warnings
|
|
7
7
|
import plotly.express as px
|
8
8
|
from datasets import Dataset
|
9
9
|
|
10
|
-
from validmind import tags, tasks
|
10
|
+
from validmind import RawData, tags, tasks
|
11
11
|
from validmind.errors import MissingDependencyError
|
12
12
|
|
13
13
|
from .utils import get_ragas_config, get_renamed_columns
|
@@ -187,9 +187,13 @@ def AspectCritic(
|
|
187
187
|
title="Aspect Critique Results",
|
188
188
|
)
|
189
189
|
|
190
|
-
return
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
|
190
|
+
return (
|
191
|
+
{
|
192
|
+
"Aspect Scores": [
|
193
|
+
{"Aspect": aspect, "Score": result_df[aspect].mean()}
|
194
|
+
for aspect in aspects + [aspect.name for aspect in custom_aspects]
|
195
|
+
]
|
196
|
+
},
|
197
|
+
fig,
|
198
|
+
RawData(evaluation_results=result_df),
|
199
|
+
)
|
@@ -7,7 +7,7 @@ import warnings
|
|
7
7
|
import plotly.express as px
|
8
8
|
from datasets import Dataset
|
9
9
|
|
10
|
-
from validmind import tags, tasks
|
10
|
+
from validmind import RawData, tags, tasks
|
11
11
|
from validmind.errors import MissingDependencyError
|
12
12
|
|
13
13
|
from .utils import get_ragas_config, get_renamed_columns
|
@@ -143,4 +143,5 @@ def ContextEntityRecall(
|
|
143
143
|
},
|
144
144
|
fig_histogram,
|
145
145
|
fig_box,
|
146
|
+
RawData(evaluation_results=result_df),
|
146
147
|
)
|
@@ -7,7 +7,7 @@ import warnings
|
|
7
7
|
import plotly.express as px
|
8
8
|
from datasets import Dataset
|
9
9
|
|
10
|
-
from validmind import tags, tasks
|
10
|
+
from validmind import RawData, tags, tasks
|
11
11
|
from validmind.errors import MissingDependencyError
|
12
12
|
|
13
13
|
from .utils import get_ragas_config, get_renamed_columns
|
@@ -135,4 +135,5 @@ def ContextPrecision(
|
|
135
135
|
},
|
136
136
|
fig_histogram,
|
137
137
|
fig_box,
|
138
|
+
RawData(evaluation_results=result_df),
|
138
139
|
)
|
@@ -7,7 +7,7 @@ import warnings
|
|
7
7
|
import plotly.express as px
|
8
8
|
from datasets import Dataset
|
9
9
|
|
10
|
-
from validmind import tags, tasks
|
10
|
+
from validmind import RawData, tags, tasks
|
11
11
|
from validmind.errors import MissingDependencyError
|
12
12
|
|
13
13
|
from .utils import get_ragas_config, get_renamed_columns
|
@@ -130,4 +130,5 @@ def ContextPrecisionWithoutReference(
|
|
130
130
|
},
|
131
131
|
fig_histogram,
|
132
132
|
fig_box,
|
133
|
+
RawData(evaluation_results=result_df),
|
133
134
|
)
|
@@ -7,7 +7,7 @@ import warnings
|
|
7
7
|
import plotly.express as px
|
8
8
|
from datasets import Dataset
|
9
9
|
|
10
|
-
from validmind import tags, tasks
|
10
|
+
from validmind import RawData, tags, tasks
|
11
11
|
from validmind.errors import MissingDependencyError
|
12
12
|
|
13
13
|
from .utils import get_ragas_config, get_renamed_columns
|
@@ -135,4 +135,5 @@ def ContextRecall(
|
|
135
135
|
},
|
136
136
|
fig_histogram,
|
137
137
|
fig_box,
|
138
|
+
RawData(evaluation_results=result_df),
|
138
139
|
)
|