validmind 2.7.6__py3-none-any.whl → 2.7.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- validmind/__init__.py +2 -0
- validmind/__version__.py +1 -1
- validmind/api_client.py +8 -1
- validmind/datasets/credit_risk/lending_club.py +3 -4
- validmind/html_templates/content_blocks.py +1 -1
- validmind/tests/__types__.py +17 -0
- validmind/tests/data_validation/ACFandPACFPlot.py +6 -2
- validmind/tests/data_validation/AutoMA.py +2 -2
- validmind/tests/data_validation/BivariateScatterPlots.py +4 -2
- validmind/tests/data_validation/BoxPierce.py +2 -2
- validmind/tests/data_validation/ClassImbalance.py +2 -1
- validmind/tests/data_validation/DatasetDescription.py +11 -2
- validmind/tests/data_validation/DatasetSplit.py +2 -2
- validmind/tests/data_validation/DickeyFullerGLS.py +2 -2
- validmind/tests/data_validation/FeatureTargetCorrelationPlot.py +8 -2
- validmind/tests/data_validation/HighCardinality.py +9 -2
- validmind/tests/data_validation/HighPearsonCorrelation.py +6 -2
- validmind/tests/data_validation/IQROutliersBarPlot.py +9 -2
- validmind/tests/data_validation/LaggedCorrelationHeatmap.py +2 -2
- validmind/tests/data_validation/MissingValuesBarPlot.py +12 -9
- validmind/tests/data_validation/MutualInformation.py +6 -8
- validmind/tests/data_validation/PearsonCorrelationMatrix.py +2 -2
- validmind/tests/data_validation/ProtectedClassesCombination.py +6 -1
- validmind/tests/data_validation/ProtectedClassesDescription.py +1 -1
- validmind/tests/data_validation/ProtectedClassesDisparity.py +4 -5
- validmind/tests/data_validation/ProtectedClassesThresholdOptimizer.py +1 -4
- validmind/tests/data_validation/RollingStatsPlot.py +21 -10
- validmind/tests/data_validation/ScatterPlot.py +3 -5
- validmind/tests/data_validation/ScoreBandDefaultRates.py +2 -1
- validmind/tests/data_validation/SeasonalDecompose.py +12 -2
- validmind/tests/data_validation/Skewness.py +6 -3
- validmind/tests/data_validation/SpreadPlot.py +8 -3
- validmind/tests/data_validation/TabularCategoricalBarPlots.py +4 -2
- validmind/tests/data_validation/TabularDateTimeHistograms.py +2 -2
- validmind/tests/data_validation/TargetRateBarPlots.py +4 -3
- validmind/tests/data_validation/TimeSeriesFrequency.py +7 -2
- validmind/tests/data_validation/TimeSeriesMissingValues.py +14 -10
- validmind/tests/data_validation/TimeSeriesOutliers.py +1 -5
- validmind/tests/data_validation/WOEBinPlots.py +2 -2
- validmind/tests/data_validation/WOEBinTable.py +11 -9
- validmind/tests/data_validation/nlp/CommonWords.py +2 -2
- validmind/tests/data_validation/nlp/Hashtags.py +2 -2
- validmind/tests/data_validation/nlp/LanguageDetection.py +9 -6
- validmind/tests/data_validation/nlp/Mentions.py +9 -6
- validmind/tests/data_validation/nlp/PolarityAndSubjectivity.py +2 -2
- validmind/tests/data_validation/nlp/Punctuations.py +4 -2
- validmind/tests/data_validation/nlp/Sentiment.py +2 -2
- validmind/tests/data_validation/nlp/StopWords.py +5 -4
- validmind/tests/data_validation/nlp/TextDescription.py +2 -2
- validmind/tests/data_validation/nlp/Toxicity.py +2 -2
- validmind/tests/model_validation/BertScore.py +2 -2
- validmind/tests/model_validation/BleuScore.py +2 -2
- validmind/tests/model_validation/ClusterSizeDistribution.py +2 -2
- validmind/tests/model_validation/ContextualRecall.py +2 -2
- validmind/tests/model_validation/FeaturesAUC.py +2 -2
- validmind/tests/model_validation/MeteorScore.py +2 -2
- validmind/tests/model_validation/ModelPredictionResiduals.py +2 -2
- validmind/tests/model_validation/RegardScore.py +6 -2
- validmind/tests/model_validation/RegressionResidualsPlot.py +4 -3
- validmind/tests/model_validation/RougeScore.py +6 -5
- validmind/tests/model_validation/TimeSeriesPredictionWithCI.py +11 -2
- validmind/tests/model_validation/TokenDisparity.py +2 -2
- validmind/tests/model_validation/ToxicityScore.py +10 -2
- validmind/tests/model_validation/embeddings/ClusterDistribution.py +9 -3
- validmind/tests/model_validation/embeddings/CosineSimilarityComparison.py +16 -2
- validmind/tests/model_validation/embeddings/CosineSimilarityDistribution.py +5 -3
- validmind/tests/model_validation/embeddings/CosineSimilarityHeatmap.py +2 -2
- validmind/tests/model_validation/embeddings/DescriptiveAnalytics.py +14 -4
- validmind/tests/model_validation/embeddings/EmbeddingsVisualization2D.py +2 -2
- validmind/tests/model_validation/embeddings/EuclideanDistanceComparison.py +16 -2
- validmind/tests/model_validation/embeddings/EuclideanDistanceHeatmap.py +2 -2
- validmind/tests/model_validation/embeddings/PCAComponentsPairwisePlots.py +4 -5
- validmind/tests/model_validation/embeddings/StabilityAnalysisKeyword.py +4 -2
- validmind/tests/model_validation/embeddings/StabilityAnalysisRandomNoise.py +4 -2
- validmind/tests/model_validation/embeddings/StabilityAnalysisSynonyms.py +4 -2
- validmind/tests/model_validation/embeddings/StabilityAnalysisTranslation.py +4 -2
- validmind/tests/model_validation/embeddings/TSNEComponentsPairwisePlots.py +8 -6
- validmind/tests/model_validation/embeddings/utils.py +11 -1
- validmind/tests/model_validation/ragas/AnswerCorrectness.py +2 -1
- validmind/tests/model_validation/ragas/AspectCritic.py +11 -7
- validmind/tests/model_validation/ragas/ContextEntityRecall.py +2 -1
- validmind/tests/model_validation/ragas/ContextPrecision.py +2 -1
- validmind/tests/model_validation/ragas/ContextPrecisionWithoutReference.py +2 -1
- validmind/tests/model_validation/ragas/ContextRecall.py +2 -1
- validmind/tests/model_validation/ragas/Faithfulness.py +2 -1
- validmind/tests/model_validation/ragas/NoiseSensitivity.py +2 -1
- validmind/tests/model_validation/ragas/ResponseRelevancy.py +2 -1
- validmind/tests/model_validation/ragas/SemanticSimilarity.py +2 -1
- validmind/tests/model_validation/sklearn/CalibrationCurve.py +3 -2
- validmind/tests/model_validation/sklearn/ClassifierThresholdOptimization.py +2 -5
- validmind/tests/model_validation/sklearn/ClusterCosineSimilarity.py +5 -2
- validmind/tests/model_validation/sklearn/ConfusionMatrix.py +2 -2
- validmind/tests/model_validation/sklearn/FeatureImportance.py +1 -14
- validmind/tests/model_validation/sklearn/HyperParametersTuning.py +6 -3
- validmind/tests/model_validation/sklearn/KMeansClustersOptimization.py +2 -2
- validmind/tests/model_validation/sklearn/MinimumROCAUCScore.py +8 -4
- validmind/tests/model_validation/sklearn/ModelParameters.py +1 -0
- validmind/tests/model_validation/sklearn/OverfitDiagnosis.py +3 -3
- validmind/tests/model_validation/sklearn/PermutationFeatureImportance.py +2 -2
- validmind/tests/model_validation/sklearn/PopulationStabilityIndex.py +20 -16
- validmind/tests/model_validation/sklearn/PrecisionRecallCurve.py +4 -2
- validmind/tests/model_validation/sklearn/ROCCurve.py +1 -1
- validmind/tests/model_validation/sklearn/RegressionR2Square.py +7 -9
- validmind/tests/model_validation/sklearn/RegressionR2SquareComparison.py +1 -3
- validmind/tests/model_validation/sklearn/SHAPGlobalImportance.py +2 -1
- validmind/tests/model_validation/sklearn/ScoreProbabilityAlignment.py +2 -1
- validmind/tests/model_validation/sklearn/SilhouettePlot.py +5 -3
- validmind/tests/model_validation/sklearn/TrainingTestDegradation.py +9 -1
- validmind/tests/model_validation/sklearn/WeakspotsDiagnosis.py +1 -1
- validmind/tests/model_validation/statsmodels/CumulativePredictionProbabilities.py +11 -4
- validmind/tests/model_validation/statsmodels/DurbinWatsonTest.py +1 -3
- validmind/tests/model_validation/statsmodels/GINITable.py +7 -15
- validmind/tests/model_validation/statsmodels/Lilliefors.py +2 -2
- validmind/tests/model_validation/statsmodels/RegressionCoeffs.py +1 -1
- validmind/tests/model_validation/statsmodels/RegressionFeatureSignificance.py +2 -2
- validmind/tests/model_validation/statsmodels/RegressionModelForecastPlotLevels.py +5 -2
- validmind/tests/model_validation/statsmodels/RegressionModelSensitivityPlot.py +5 -2
- validmind/tests/model_validation/statsmodels/RegressionModelSummary.py +7 -7
- validmind/tests/model_validation/statsmodels/RegressionPermutationFeatureImportance.py +2 -2
- validmind/tests/ongoing_monitoring/CalibrationCurveDrift.py +3 -1
- validmind/tests/ongoing_monitoring/ClassDiscriminationDrift.py +4 -2
- validmind/tests/ongoing_monitoring/ClassImbalanceDrift.py +4 -2
- validmind/tests/ongoing_monitoring/ClassificationAccuracyDrift.py +3 -1
- validmind/tests/ongoing_monitoring/ConfusionMatrixDrift.py +3 -1
- validmind/tests/ongoing_monitoring/CumulativePredictionProbabilitiesDrift.py +3 -1
- validmind/tests/ongoing_monitoring/FeatureDrift.py +1 -0
- validmind/tests/ongoing_monitoring/PredictionCorrelation.py +1 -0
- validmind/tests/ongoing_monitoring/PredictionProbabilitiesHistogramDrift.py +3 -1
- validmind/tests/ongoing_monitoring/PredictionQuantilesAcrossFeatures.py +1 -0
- validmind/tests/ongoing_monitoring/ROCCurveDrift.py +3 -2
- validmind/tests/ongoing_monitoring/ScoreBandsDrift.py +4 -2
- validmind/tests/ongoing_monitoring/ScorecardHistogramDrift.py +3 -1
- validmind/tests/ongoing_monitoring/TargetPredictionDistributionPlot.py +4 -3
- validmind/tests/prompt_validation/Bias.py +13 -9
- validmind/tests/prompt_validation/Clarity.py +13 -9
- validmind/tests/prompt_validation/Conciseness.py +13 -9
- validmind/tests/prompt_validation/Delimitation.py +13 -9
- validmind/tests/prompt_validation/NegativeInstruction.py +14 -11
- validmind/tests/prompt_validation/Robustness.py +6 -2
- validmind/tests/prompt_validation/Specificity.py +13 -9
- validmind/tests/run.py +6 -0
- validmind/utils.py +7 -8
- {validmind-2.7.6.dist-info → validmind-2.7.7.dist-info}/METADATA +1 -2
- {validmind-2.7.6.dist-info → validmind-2.7.7.dist-info}/RECORD +147 -147
- {validmind-2.7.6.dist-info → validmind-2.7.7.dist-info}/WHEEL +1 -1
- {validmind-2.7.6.dist-info → validmind-2.7.7.dist-info}/LICENSE +0 -0
- {validmind-2.7.6.dist-info → validmind-2.7.7.dist-info}/entry_points.txt +0 -0
@@ -7,7 +7,7 @@ import pandas as pd
|
|
7
7
|
import plotly.graph_objects as go
|
8
8
|
from scipy.stats import norm
|
9
9
|
|
10
|
-
from validmind import tags, tasks
|
10
|
+
from validmind import RawData, tags, tasks
|
11
11
|
|
12
12
|
|
13
13
|
@tags("model_predictions", "visualization")
|
@@ -144,4 +144,13 @@ def TimeSeriesPredictionWithCI(dataset, model, confidence=0.95):
|
|
144
144
|
template="plotly_white",
|
145
145
|
)
|
146
146
|
|
147
|
-
return
|
147
|
+
return (
|
148
|
+
fig,
|
149
|
+
breaches_df,
|
150
|
+
RawData(
|
151
|
+
errors=errors,
|
152
|
+
z_score=z_score,
|
153
|
+
lower_confidence=lower_conf,
|
154
|
+
upper_confidence=upper_conf,
|
155
|
+
),
|
156
|
+
)
|
@@ -5,7 +5,7 @@
|
|
5
5
|
import pandas as pd
|
6
6
|
import plotly.graph_objects as go
|
7
7
|
|
8
|
-
from validmind import tags, tasks
|
8
|
+
from validmind import RawData, tags, tasks
|
9
9
|
|
10
10
|
|
11
11
|
@tags("nlp", "text_data", "visualization")
|
@@ -108,4 +108,4 @@ def TokenDisparity(dataset, model):
|
|
108
108
|
# Create a DataFrame from all collected statistics
|
109
109
|
result_df = pd.DataFrame(stats_df).reset_index().rename(columns={"index": "Metric"})
|
110
110
|
|
111
|
-
return (result_df, *
|
111
|
+
return (result_df, *figures, RawData(token_counts_df=df))
|
@@ -6,7 +6,7 @@ import evaluate
|
|
6
6
|
import pandas as pd
|
7
7
|
import plotly.graph_objects as go
|
8
8
|
|
9
|
-
from validmind import tags, tasks
|
9
|
+
from validmind import RawData, tags, tasks
|
10
10
|
|
11
11
|
|
12
12
|
@tags("nlp", "text_data", "visualization")
|
@@ -139,4 +139,12 @@ def ToxicityScore(dataset, model):
|
|
139
139
|
]
|
140
140
|
]
|
141
141
|
|
142
|
-
return (
|
142
|
+
return (
|
143
|
+
result_df,
|
144
|
+
*tuple(figures),
|
145
|
+
RawData(
|
146
|
+
input_toxicity_df=input_df,
|
147
|
+
true_toxicity_df=true_df,
|
148
|
+
pred_toxicity_df=pred_df,
|
149
|
+
),
|
150
|
+
)
|
@@ -5,7 +5,7 @@
|
|
5
5
|
import plotly.express as px
|
6
6
|
from sklearn.cluster import KMeans
|
7
7
|
|
8
|
-
from validmind import tags, tasks
|
8
|
+
from validmind import RawData, tags, tasks
|
9
9
|
from validmind.vm_models import VMDataset, VMModel
|
10
10
|
|
11
11
|
|
@@ -52,8 +52,14 @@ def ClusterDistribution(model: VMModel, dataset: VMDataset, num_clusters: int =
|
|
52
52
|
- Uses the KMeans clustering algorithm, which assumes that clusters are convex and isotropic, and may not work as
|
53
53
|
intended if the true clusters in the data are not of this shape.
|
54
54
|
"""
|
55
|
-
|
56
|
-
|
55
|
+
embeddings = dataset.y_pred(model)
|
56
|
+
kmeans = KMeans(n_clusters=num_clusters).fit(embeddings)
|
57
|
+
labels = kmeans.labels_
|
58
|
+
|
59
|
+
fig = px.histogram(
|
60
|
+
labels,
|
57
61
|
nbins=num_clusters,
|
58
62
|
title="Embeddings Cluster Distribution",
|
59
63
|
)
|
64
|
+
|
65
|
+
return fig, RawData(labels=labels)
|
@@ -9,7 +9,7 @@ import pandas as pd
|
|
9
9
|
import plotly.express as px
|
10
10
|
from sklearn.metrics.pairwise import cosine_similarity
|
11
11
|
|
12
|
-
from validmind import tags, tasks
|
12
|
+
from validmind import RawData, tags, tasks
|
13
13
|
|
14
14
|
|
15
15
|
@tags("visualization", "dimensionality_reduction", "embeddings")
|
@@ -63,6 +63,7 @@ def CosineSimilarityComparison(dataset, models):
|
|
63
63
|
figures = []
|
64
64
|
# Initialize a list to store data for the DataFrame
|
65
65
|
all_stats = []
|
66
|
+
similarity_matrices = []
|
66
67
|
|
67
68
|
# Generate all pairs of models for comparison
|
68
69
|
for model_A, model_B in combinations(models, 2):
|
@@ -73,6 +74,15 @@ def CosineSimilarityComparison(dataset, models):
|
|
73
74
|
similarity_matrix = cosine_similarity(embeddings_A, embeddings_B)
|
74
75
|
similarities = similarity_matrix.flatten()
|
75
76
|
|
77
|
+
# store similarity matrix
|
78
|
+
similarity_matrices.append(
|
79
|
+
{
|
80
|
+
"model_A": model_A.input_id,
|
81
|
+
"model_B": model_B.input_id,
|
82
|
+
"similarity_matrix": similarity_matrix,
|
83
|
+
}
|
84
|
+
)
|
85
|
+
|
76
86
|
# Generate statistics and add model combination as a column
|
77
87
|
stats_data = {
|
78
88
|
"Combination": f"{model_A.input_id} vs {model_B.input_id}",
|
@@ -100,4 +110,8 @@ def CosineSimilarityComparison(dataset, models):
|
|
100
110
|
# Create a DataFrame from all collected statistics
|
101
111
|
stats_df = pd.DataFrame(all_stats)
|
102
112
|
|
103
|
-
return (
|
113
|
+
return (
|
114
|
+
*figures,
|
115
|
+
stats_df,
|
116
|
+
RawData(similarity_matrices=pd.DataFrame(similarity_matrices)),
|
117
|
+
)
|
@@ -5,7 +5,7 @@
|
|
5
5
|
import plotly.express as px
|
6
6
|
from sklearn.metrics.pairwise import cosine_similarity
|
7
7
|
|
8
|
-
from validmind import tags, tasks
|
8
|
+
from validmind import RawData, tags, tasks
|
9
9
|
from validmind.vm_models import VMDataset, VMModel
|
10
10
|
|
11
11
|
|
@@ -52,9 +52,11 @@ def CosineSimilarityDistribution(dataset: VMDataset, model: VMModel):
|
|
52
52
|
- The output is sensitive to the choice of bin number for the histogram. Different bin numbers could give a
|
53
53
|
slightly altered perspective on the distribution of cosine similarity.
|
54
54
|
"""
|
55
|
+
similarity_scores = cosine_similarity(dataset.y_pred(model)).flatten()
|
56
|
+
|
55
57
|
return px.histogram(
|
56
|
-
x=
|
58
|
+
x=similarity_scores,
|
57
59
|
nbins=100,
|
58
60
|
title="Cosine Similarity Distribution",
|
59
61
|
labels={"x": "Cosine Similarity"},
|
60
|
-
)
|
62
|
+
), RawData(similarity_scores=similarity_scores)
|
@@ -6,7 +6,7 @@ import numpy as np
|
|
6
6
|
import plotly.express as px
|
7
7
|
from sklearn.metrics.pairwise import cosine_similarity
|
8
8
|
|
9
|
-
from validmind import tags, tasks
|
9
|
+
from validmind import RawData, tags, tasks
|
10
10
|
|
11
11
|
|
12
12
|
@tags("visualization", "dimensionality_reduction", "embeddings")
|
@@ -81,4 +81,4 @@ def CosineSimilarityHeatmap(
|
|
81
81
|
yaxis_title=yaxis_title,
|
82
82
|
)
|
83
83
|
|
84
|
-
return fig
|
84
|
+
return fig, RawData(similarity_matrix=similarity_matrix)
|
@@ -5,7 +5,7 @@
|
|
5
5
|
import numpy as np
|
6
6
|
import plotly.express as px
|
7
7
|
|
8
|
-
from validmind import tags, tasks
|
8
|
+
from validmind import RawData, tags, tasks
|
9
9
|
from validmind.vm_models import VMDataset, VMModel
|
10
10
|
|
11
11
|
|
@@ -55,17 +55,27 @@ def DescriptiveAnalytics(dataset: VMDataset, model: VMModel):
|
|
55
55
|
- While it displays valuable information about the central tendency and spread of data, it does not provide
|
56
56
|
information about correlations between different embedding dimensions.
|
57
57
|
"""
|
58
|
+
y_pred = dataset.y_pred(model)
|
59
|
+
embedding_means = np.mean(y_pred, axis=0)
|
60
|
+
embedding_medians = np.median(y_pred, axis=0)
|
61
|
+
embedding_stds = np.std(y_pred, axis=0)
|
62
|
+
|
58
63
|
return (
|
59
64
|
px.histogram(
|
60
|
-
x=
|
65
|
+
x=embedding_means,
|
61
66
|
title="Distribution of Embedding Means",
|
62
67
|
),
|
63
68
|
px.histogram(
|
64
|
-
x=
|
69
|
+
x=embedding_medians,
|
65
70
|
title="Distribution of Embedding Medians",
|
66
71
|
),
|
67
72
|
px.histogram(
|
68
|
-
x=
|
73
|
+
x=embedding_stds,
|
69
74
|
title="Distribution of Embedding Standard Deviations",
|
70
75
|
),
|
76
|
+
RawData(
|
77
|
+
embedding_means=embedding_means,
|
78
|
+
embedding_medians=embedding_medians,
|
79
|
+
embedding_stds=embedding_stds,
|
80
|
+
),
|
71
81
|
)
|
@@ -7,7 +7,7 @@ from typing import Union
|
|
7
7
|
import plotly.express as px
|
8
8
|
from sklearn.manifold import TSNE
|
9
9
|
|
10
|
-
from validmind import tags, tasks
|
10
|
+
from validmind import RawData, tags, tasks
|
11
11
|
from validmind.logging import get_logger
|
12
12
|
from validmind.vm_models import VMDataset, VMModel
|
13
13
|
|
@@ -89,4 +89,4 @@ def EmbeddingsVisualization2D(
|
|
89
89
|
fig = px.scatter(**scatter_kwargs)
|
90
90
|
fig.update_layout(width=500, height=500)
|
91
91
|
|
92
|
-
return fig
|
92
|
+
return fig, RawData(tsne_embeddings=reduced_embeddings)
|
@@ -9,7 +9,7 @@ import pandas as pd
|
|
9
9
|
import plotly.express as px
|
10
10
|
from sklearn.metrics.pairwise import euclidean_distances
|
11
11
|
|
12
|
-
from validmind import tags, tasks
|
12
|
+
from validmind import RawData, tags, tasks
|
13
13
|
|
14
14
|
|
15
15
|
@tags("visualization", "dimensionality_reduction", "embeddings")
|
@@ -57,6 +57,8 @@ def EuclideanDistanceComparison(dataset, models):
|
|
57
57
|
figures = []
|
58
58
|
all_stats = []
|
59
59
|
|
60
|
+
distance_matrices = {}
|
61
|
+
|
60
62
|
# Generate all pairs of models for comparison
|
61
63
|
for model_A, model_B in combinations(models, 2):
|
62
64
|
embeddings_A = np.stack(dataset.y_pred(model_A))
|
@@ -66,6 +68,15 @@ def EuclideanDistanceComparison(dataset, models):
|
|
66
68
|
distance_matrix = euclidean_distances(embeddings_A, embeddings_B)
|
67
69
|
distances = distance_matrix.flatten()
|
68
70
|
|
71
|
+
# Store raw distance matrix for each pair-wise comparison
|
72
|
+
distance_matrices.append(
|
73
|
+
{
|
74
|
+
"model_A": model_A.input_id,
|
75
|
+
"model_B": model_B.input_id,
|
76
|
+
"distance_matrix": distance_matrix,
|
77
|
+
}
|
78
|
+
)
|
79
|
+
|
69
80
|
# Generate statistics and add model combination as a column
|
70
81
|
stats_data = {
|
71
82
|
"Combination": f"{model_A.input_id} vs {model_B.input_id}",
|
@@ -93,4 +104,7 @@ def EuclideanDistanceComparison(dataset, models):
|
|
93
104
|
# Create a DataFrame from all collected statistics
|
94
105
|
stats_df = pd.DataFrame(all_stats)
|
95
106
|
|
96
|
-
|
107
|
+
# Add raw data to return
|
108
|
+
raw_data = RawData(distance_matrices=pd.DataFrame(distance_matrices))
|
109
|
+
|
110
|
+
return (stats_df, *figures, raw_data)
|
@@ -6,7 +6,7 @@ import numpy as np
|
|
6
6
|
import plotly.express as px
|
7
7
|
from sklearn.metrics.pairwise import euclidean_distances
|
8
8
|
|
9
|
-
from validmind import tags, tasks
|
9
|
+
from validmind import RawData, tags, tasks
|
10
10
|
|
11
11
|
|
12
12
|
@tags("visualization", "dimensionality_reduction", "embeddings")
|
@@ -79,4 +79,4 @@ def EuclideanDistanceHeatmap(
|
|
79
79
|
yaxis_title=yaxis_title,
|
80
80
|
)
|
81
81
|
|
82
|
-
return fig
|
82
|
+
return fig, RawData(distance_matrix=distance_matrix)
|
@@ -10,7 +10,7 @@ import plotly.express as px
|
|
10
10
|
from sklearn.decomposition import PCA
|
11
11
|
from sklearn.preprocessing import StandardScaler
|
12
12
|
|
13
|
-
from validmind import tags, tasks
|
13
|
+
from validmind import RawData, tags, tasks
|
14
14
|
|
15
15
|
|
16
16
|
@tags("visualization", "dimensionality_reduction", "embeddings")
|
@@ -74,7 +74,7 @@ def PCAComponentsPairwisePlots(dataset, model, n_components=3):
|
|
74
74
|
)
|
75
75
|
|
76
76
|
# List to store each plot
|
77
|
-
|
77
|
+
figures = []
|
78
78
|
|
79
79
|
# Create plots for each pair of principal components
|
80
80
|
for pc1, pc2 in itertools.combinations(range(1, n_components + 1), 2):
|
@@ -88,7 +88,6 @@ def PCAComponentsPairwisePlots(dataset, model, n_components=3):
|
|
88
88
|
f"PC{pc2}": f"Principal Component {pc2}",
|
89
89
|
},
|
90
90
|
)
|
91
|
-
|
91
|
+
figures.append(fig)
|
92
92
|
|
93
|
-
|
94
|
-
return tuple(plots)
|
93
|
+
return (*figures, RawData(pca_results=pca_df))
|
@@ -5,7 +5,7 @@
|
|
5
5
|
import re
|
6
6
|
from typing import Dict
|
7
7
|
|
8
|
-
from validmind import tags, tasks
|
8
|
+
from validmind import RawData, tags, tasks
|
9
9
|
from validmind.vm_models import VMDataset, VMModel
|
10
10
|
|
11
11
|
from .utils import create_stability_analysis_result
|
@@ -91,8 +91,10 @@ def StabilityAnalysisKeyword(
|
|
91
91
|
perturb_data
|
92
92
|
)
|
93
93
|
|
94
|
-
|
94
|
+
raw_data, results = create_stability_analysis_result(
|
95
95
|
dataset.y_pred(model),
|
96
96
|
model.predict(perturbed_df),
|
97
97
|
mean_similarity_threshold,
|
98
98
|
)
|
99
|
+
|
100
|
+
return results, RawData(original_perturbed_similarity=raw_data)
|
@@ -5,7 +5,7 @@
|
|
5
5
|
import random
|
6
6
|
import string
|
7
7
|
|
8
|
-
from validmind import tags, tasks
|
8
|
+
from validmind import RawData, tags, tasks
|
9
9
|
from validmind.vm_models import VMDataset, VMModel
|
10
10
|
|
11
11
|
from .utils import create_stability_analysis_result
|
@@ -145,8 +145,10 @@ def StabilityAnalysisRandomNoise(
|
|
145
145
|
perturb_data
|
146
146
|
)
|
147
147
|
|
148
|
-
|
148
|
+
raw_data, result = create_stability_analysis_result(
|
149
149
|
dataset.y_pred(model),
|
150
150
|
model.predict(perturbed_df),
|
151
151
|
mean_similarity_threshold,
|
152
152
|
)
|
153
|
+
|
154
|
+
return result, RawData(original_perturbed_similarity=raw_data)
|
@@ -7,7 +7,7 @@ import random
|
|
7
7
|
import nltk
|
8
8
|
from nltk.corpus import wordnet as wn
|
9
9
|
|
10
|
-
from validmind import tags, tasks
|
10
|
+
from validmind import RawData, tags, tasks
|
11
11
|
from validmind.vm_models import VMDataset, VMModel
|
12
12
|
|
13
13
|
from .utils import create_stability_analysis_result
|
@@ -101,8 +101,10 @@ def StabilityAnalysisSynonyms(
|
|
101
101
|
perturb_data
|
102
102
|
)
|
103
103
|
|
104
|
-
|
104
|
+
raw_data, result = create_stability_analysis_result(
|
105
105
|
dataset.y_pred(model),
|
106
106
|
model.predict(perturbed_df),
|
107
107
|
mean_similarity_threshold,
|
108
108
|
)
|
109
|
+
|
110
|
+
return result, RawData(original_perturbed_similarity=raw_data)
|
@@ -4,7 +4,7 @@
|
|
4
4
|
|
5
5
|
from transformers import MarianMTModel, MarianTokenizer
|
6
6
|
|
7
|
-
from validmind import tags, tasks
|
7
|
+
from validmind import RawData, tags, tasks
|
8
8
|
from validmind.logging import get_logger
|
9
9
|
from validmind.vm_models import VMDataset, VMModel
|
10
10
|
|
@@ -128,8 +128,10 @@ def StabilityAnalysisTranslation(
|
|
128
128
|
perturb_data
|
129
129
|
)
|
130
130
|
|
131
|
-
|
131
|
+
raw_data, result = create_stability_analysis_result(
|
132
132
|
dataset.y_pred(model),
|
133
133
|
model.predict(perturbed_df),
|
134
134
|
mean_similarity_threshold,
|
135
135
|
)
|
136
|
+
|
137
|
+
return result, RawData(original_perturbed_similarity=raw_data)
|
@@ -10,7 +10,7 @@ import plotly.express as px
|
|
10
10
|
from sklearn.manifold import TSNE
|
11
11
|
from sklearn.preprocessing import StandardScaler
|
12
12
|
|
13
|
-
from validmind import tags, tasks
|
13
|
+
from validmind import RawData, tags, tasks
|
14
14
|
|
15
15
|
|
16
16
|
@tags("visualization", "dimensionality_reduction", "embeddings")
|
@@ -80,7 +80,7 @@ def TSNEComponentsPairwisePlots(
|
|
80
80
|
)
|
81
81
|
|
82
82
|
# List to store each plot
|
83
|
-
|
83
|
+
figures = []
|
84
84
|
|
85
85
|
# Create plots for each pair of t-SNE components (if n_components > 1)
|
86
86
|
if n_components > 1:
|
@@ -95,7 +95,7 @@ def TSNEComponentsPairwisePlots(
|
|
95
95
|
f"Component {comp2}": f"Component {comp2}",
|
96
96
|
},
|
97
97
|
)
|
98
|
-
|
98
|
+
figures.append(fig)
|
99
99
|
else:
|
100
100
|
fig = px.scatter(
|
101
101
|
tsne_df,
|
@@ -106,7 +106,9 @@ def TSNEComponentsPairwisePlots(
|
|
106
106
|
"Component 1": "Component 1",
|
107
107
|
},
|
108
108
|
)
|
109
|
-
|
109
|
+
figures.append(fig)
|
110
110
|
|
111
|
-
|
112
|
-
|
111
|
+
return (
|
112
|
+
*figures,
|
113
|
+
RawData(embeddings_scaled=embeddings_scaled, tsne_results=tsne_results),
|
114
|
+
)
|
@@ -3,6 +3,7 @@
|
|
3
3
|
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
4
|
|
5
5
|
import numpy as np
|
6
|
+
import pandas as pd
|
6
7
|
import plotly.express as px
|
7
8
|
from sklearn.metrics.pairwise import cosine_similarity
|
8
9
|
|
@@ -17,10 +18,19 @@ def create_stability_analysis_result(
|
|
17
18
|
original_embeddings, perturbed_embeddings
|
18
19
|
).diagonal()
|
19
20
|
|
21
|
+
# create a raw dataframe of the original, perturbed and similarity
|
22
|
+
raw_data = pd.DataFrame(
|
23
|
+
{
|
24
|
+
"original": original_embeddings,
|
25
|
+
"perturbed": perturbed_embeddings,
|
26
|
+
"similarity": similarities,
|
27
|
+
}
|
28
|
+
)
|
29
|
+
|
20
30
|
mean = np.mean(similarities)
|
21
31
|
passed = mean > mean_similarity_threshold
|
22
32
|
|
23
|
-
return (
|
33
|
+
return raw_data, (
|
24
34
|
[
|
25
35
|
{
|
26
36
|
"Mean Similarity": mean,
|
@@ -7,7 +7,7 @@ import warnings
|
|
7
7
|
import plotly.express as px
|
8
8
|
from datasets import Dataset
|
9
9
|
|
10
|
-
from validmind import tags, tasks
|
10
|
+
from validmind import RawData, tags, tasks
|
11
11
|
from validmind.errors import MissingDependencyError
|
12
12
|
|
13
13
|
from .utils import get_ragas_config, get_renamed_columns
|
@@ -144,4 +144,5 @@ def AnswerCorrectness(
|
|
144
144
|
},
|
145
145
|
fig_histogram,
|
146
146
|
fig_box,
|
147
|
+
RawData(evaluation_results=result_df),
|
147
148
|
)
|
@@ -7,7 +7,7 @@ import warnings
|
|
7
7
|
import plotly.express as px
|
8
8
|
from datasets import Dataset
|
9
9
|
|
10
|
-
from validmind import tags, tasks
|
10
|
+
from validmind import RawData, tags, tasks
|
11
11
|
from validmind.errors import MissingDependencyError
|
12
12
|
|
13
13
|
from .utils import get_ragas_config, get_renamed_columns
|
@@ -187,9 +187,13 @@ def AspectCritic(
|
|
187
187
|
title="Aspect Critique Results",
|
188
188
|
)
|
189
189
|
|
190
|
-
return
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
|
190
|
+
return (
|
191
|
+
{
|
192
|
+
"Aspect Scores": [
|
193
|
+
{"Aspect": aspect, "Score": result_df[aspect].mean()}
|
194
|
+
for aspect in aspects + [aspect.name for aspect in custom_aspects]
|
195
|
+
]
|
196
|
+
},
|
197
|
+
fig,
|
198
|
+
RawData(evaluation_results=result_df),
|
199
|
+
)
|
@@ -7,7 +7,7 @@ import warnings
|
|
7
7
|
import plotly.express as px
|
8
8
|
from datasets import Dataset
|
9
9
|
|
10
|
-
from validmind import tags, tasks
|
10
|
+
from validmind import RawData, tags, tasks
|
11
11
|
from validmind.errors import MissingDependencyError
|
12
12
|
|
13
13
|
from .utils import get_ragas_config, get_renamed_columns
|
@@ -143,4 +143,5 @@ def ContextEntityRecall(
|
|
143
143
|
},
|
144
144
|
fig_histogram,
|
145
145
|
fig_box,
|
146
|
+
RawData(evaluation_results=result_df),
|
146
147
|
)
|
@@ -7,7 +7,7 @@ import warnings
|
|
7
7
|
import plotly.express as px
|
8
8
|
from datasets import Dataset
|
9
9
|
|
10
|
-
from validmind import tags, tasks
|
10
|
+
from validmind import RawData, tags, tasks
|
11
11
|
from validmind.errors import MissingDependencyError
|
12
12
|
|
13
13
|
from .utils import get_ragas_config, get_renamed_columns
|
@@ -135,4 +135,5 @@ def ContextPrecision(
|
|
135
135
|
},
|
136
136
|
fig_histogram,
|
137
137
|
fig_box,
|
138
|
+
RawData(evaluation_results=result_df),
|
138
139
|
)
|
@@ -7,7 +7,7 @@ import warnings
|
|
7
7
|
import plotly.express as px
|
8
8
|
from datasets import Dataset
|
9
9
|
|
10
|
-
from validmind import tags, tasks
|
10
|
+
from validmind import RawData, tags, tasks
|
11
11
|
from validmind.errors import MissingDependencyError
|
12
12
|
|
13
13
|
from .utils import get_ragas_config, get_renamed_columns
|
@@ -130,4 +130,5 @@ def ContextPrecisionWithoutReference(
|
|
130
130
|
},
|
131
131
|
fig_histogram,
|
132
132
|
fig_box,
|
133
|
+
RawData(evaluation_results=result_df),
|
133
134
|
)
|
@@ -7,7 +7,7 @@ import warnings
|
|
7
7
|
import plotly.express as px
|
8
8
|
from datasets import Dataset
|
9
9
|
|
10
|
-
from validmind import tags, tasks
|
10
|
+
from validmind import RawData, tags, tasks
|
11
11
|
from validmind.errors import MissingDependencyError
|
12
12
|
|
13
13
|
from .utils import get_ragas_config, get_renamed_columns
|
@@ -135,4 +135,5 @@ def ContextRecall(
|
|
135
135
|
},
|
136
136
|
fig_histogram,
|
137
137
|
fig_box,
|
138
|
+
RawData(evaluation_results=result_df),
|
138
139
|
)
|
@@ -7,7 +7,7 @@ import warnings
|
|
7
7
|
import plotly.express as px
|
8
8
|
from datasets import Dataset
|
9
9
|
|
10
|
-
from validmind import tags, tasks
|
10
|
+
from validmind import RawData, tags, tasks
|
11
11
|
from validmind.errors import MissingDependencyError
|
12
12
|
|
13
13
|
from .utils import get_ragas_config, get_renamed_columns
|
@@ -140,4 +140,5 @@ def Faithfulness(
|
|
140
140
|
},
|
141
141
|
fig_histogram,
|
142
142
|
fig_box,
|
143
|
+
RawData(evaluation_results=result_df),
|
143
144
|
)
|
@@ -7,7 +7,7 @@ import warnings
|
|
7
7
|
import plotly.express as px
|
8
8
|
from datasets import Dataset
|
9
9
|
|
10
|
-
from validmind import tags, tasks
|
10
|
+
from validmind import RawData, tags, tasks
|
11
11
|
from validmind.errors import MissingDependencyError
|
12
12
|
|
13
13
|
from .utils import get_ragas_config, get_renamed_columns
|
@@ -179,4 +179,5 @@ def NoiseSensitivity(
|
|
179
179
|
},
|
180
180
|
fig_histogram,
|
181
181
|
fig_box,
|
182
|
+
RawData(evaluation_results=result_df),
|
182
183
|
)
|
@@ -7,7 +7,7 @@ import warnings
|
|
7
7
|
import plotly.express as px
|
8
8
|
from datasets import Dataset
|
9
9
|
|
10
|
-
from validmind import tags, tasks
|
10
|
+
from validmind import RawData, tags, tasks
|
11
11
|
from validmind.errors import MissingDependencyError
|
12
12
|
|
13
13
|
from .utils import get_ragas_config, get_renamed_columns
|
@@ -154,4 +154,5 @@ def ResponseRelevancy(
|
|
154
154
|
},
|
155
155
|
fig_histogram,
|
156
156
|
fig_box,
|
157
|
+
RawData(evaluation_results=result_df),
|
157
158
|
)
|
@@ -7,7 +7,7 @@ import warnings
|
|
7
7
|
import plotly.express as px
|
8
8
|
from datasets import Dataset
|
9
9
|
|
10
|
-
from validmind import tags, tasks
|
10
|
+
from validmind import RawData, tags, tasks
|
11
11
|
from validmind.errors import MissingDependencyError
|
12
12
|
|
13
13
|
from .utils import get_ragas_config, get_renamed_columns
|
@@ -133,4 +133,5 @@ def SemanticSimilarity(
|
|
133
133
|
},
|
134
134
|
fig_histogram,
|
135
135
|
fig_box,
|
136
|
+
RawData(evaluation_results=result_df),
|
136
137
|
)
|
@@ -2,10 +2,11 @@
|
|
2
2
|
# See the LICENSE file in the root of this repository for details.
|
3
3
|
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
4
|
|
5
|
-
from sklearn.calibration import calibration_curve
|
6
5
|
import plotly.graph_objects as go
|
6
|
+
from sklearn.calibration import calibration_curve
|
7
|
+
|
7
8
|
from validmind import tags, tasks
|
8
|
-
from validmind.vm_models import
|
9
|
+
from validmind.vm_models import VMDataset, VMModel
|
9
10
|
from validmind.vm_models.result import RawData
|
10
11
|
|
11
12
|
|
@@ -6,11 +6,8 @@ import numpy as np
|
|
6
6
|
import pandas as pd
|
7
7
|
import plotly.graph_objects as go
|
8
8
|
from plotly.subplots import make_subplots
|
9
|
-
from sklearn.metrics import
|
10
|
-
|
11
|
-
precision_recall_curve,
|
12
|
-
confusion_matrix,
|
13
|
-
)
|
9
|
+
from sklearn.metrics import confusion_matrix, precision_recall_curve, roc_curve
|
10
|
+
|
14
11
|
from validmind import tags, tasks
|
15
12
|
from validmind.vm_models import VMDataset, VMModel
|
16
13
|
|