validmind 2.8.28__py3-none-any.whl → 2.8.29__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- validmind/models/function.py +11 -3
- validmind/tests/data_validation/ACFandPACFPlot.py +3 -1
- validmind/tests/data_validation/ADF.py +3 -1
- validmind/tests/data_validation/AutoAR.py +3 -1
- validmind/tests/data_validation/AutoMA.py +5 -1
- validmind/tests/data_validation/AutoStationarity.py +5 -1
- validmind/tests/data_validation/BivariateScatterPlots.py +3 -1
- validmind/tests/data_validation/BoxPierce.py +4 -1
- validmind/tests/data_validation/ChiSquaredFeaturesTable.py +1 -1
- validmind/tests/data_validation/ClassImbalance.py +1 -1
- validmind/tests/data_validation/DatasetDescription.py +4 -1
- validmind/tests/data_validation/DatasetSplit.py +3 -2
- validmind/tests/data_validation/DescriptiveStatistics.py +3 -1
- validmind/tests/data_validation/DickeyFullerGLS.py +3 -1
- validmind/tests/data_validation/Duplicates.py +3 -1
- validmind/tests/data_validation/EngleGrangerCoint.py +6 -1
- validmind/tests/data_validation/FeatureTargetCorrelationPlot.py +3 -1
- validmind/tests/data_validation/HighCardinality.py +3 -1
- validmind/tests/data_validation/HighPearsonCorrelation.py +4 -1
- validmind/tests/data_validation/IQROutliersBarPlot.py +4 -1
- validmind/tests/data_validation/IQROutliersTable.py +6 -1
- validmind/tests/data_validation/IsolationForestOutliers.py +3 -1
- validmind/tests/data_validation/JarqueBera.py +3 -1
- validmind/tests/data_validation/KPSS.py +3 -1
- validmind/tests/data_validation/LJungBox.py +3 -1
- validmind/tests/data_validation/LaggedCorrelationHeatmap.py +6 -1
- validmind/tests/data_validation/MissingValues.py +5 -1
- validmind/tests/data_validation/MissingValuesBarPlot.py +3 -1
- validmind/tests/data_validation/MutualInformation.py +4 -1
- validmind/tests/data_validation/PearsonCorrelationMatrix.py +3 -1
- validmind/tests/data_validation/PhillipsPerronArch.py +3 -1
- validmind/tests/data_validation/ProtectedClassesCombination.py +5 -1
- validmind/tests/data_validation/ProtectedClassesDescription.py +5 -1
- validmind/tests/data_validation/ProtectedClassesDisparity.py +5 -3
- validmind/tests/data_validation/ProtectedClassesThresholdOptimizer.py +9 -2
- validmind/tests/data_validation/RollingStatsPlot.py +5 -1
- validmind/tests/data_validation/RunsTest.py +1 -1
- validmind/tests/data_validation/ScatterPlot.py +2 -1
- validmind/tests/data_validation/ScoreBandDefaultRates.py +3 -1
- validmind/tests/data_validation/SeasonalDecompose.py +6 -1
- validmind/tests/data_validation/ShapiroWilk.py +4 -1
- validmind/tests/data_validation/Skewness.py +3 -1
- validmind/tests/data_validation/SpreadPlot.py +3 -1
- validmind/tests/data_validation/TabularCategoricalBarPlots.py +4 -1
- validmind/tests/data_validation/TabularDateTimeHistograms.py +3 -1
- validmind/tests/data_validation/TabularDescriptionTables.py +4 -1
- validmind/tests/data_validation/TabularNumericalHistograms.py +3 -1
- validmind/tests/data_validation/TargetRateBarPlots.py +4 -1
- validmind/tests/data_validation/TimeSeriesDescription.py +1 -1
- validmind/tests/data_validation/TimeSeriesDescriptiveStatistics.py +1 -1
- validmind/tests/data_validation/TimeSeriesFrequency.py +5 -1
- validmind/tests/data_validation/TimeSeriesHistogram.py +4 -1
- validmind/tests/data_validation/TimeSeriesLinePlot.py +3 -1
- validmind/tests/data_validation/TimeSeriesMissingValues.py +6 -1
- validmind/tests/data_validation/TimeSeriesOutliers.py +5 -1
- validmind/tests/data_validation/TooManyZeroValues.py +6 -1
- validmind/tests/data_validation/UniqueRows.py +5 -1
- validmind/tests/data_validation/WOEBinPlots.py +4 -1
- validmind/tests/data_validation/WOEBinTable.py +5 -1
- validmind/tests/data_validation/ZivotAndrewsArch.py +3 -1
- validmind/tests/data_validation/nlp/CommonWords.py +2 -1
- validmind/tests/data_validation/nlp/Hashtags.py +2 -1
- validmind/tests/data_validation/nlp/LanguageDetection.py +4 -1
- validmind/tests/data_validation/nlp/Mentions.py +3 -1
- validmind/tests/data_validation/nlp/PolarityAndSubjectivity.py +6 -1
- validmind/tests/data_validation/nlp/Punctuations.py +2 -1
- validmind/tests/data_validation/nlp/Sentiment.py +3 -1
- validmind/tests/data_validation/nlp/StopWords.py +2 -1
- validmind/tests/data_validation/nlp/TextDescription.py +3 -1
- validmind/tests/data_validation/nlp/Toxicity.py +3 -1
- validmind/tests/load.py +91 -17
- validmind/tests/model_validation/BertScore.py +6 -3
- validmind/tests/model_validation/BleuScore.py +6 -1
- validmind/tests/model_validation/ClusterSizeDistribution.py +5 -1
- validmind/tests/model_validation/ContextualRecall.py +6 -1
- validmind/tests/model_validation/FeaturesAUC.py +5 -1
- validmind/tests/model_validation/MeteorScore.py +6 -1
- validmind/tests/model_validation/ModelMetadata.py +2 -1
- validmind/tests/model_validation/ModelPredictionResiduals.py +10 -2
- validmind/tests/model_validation/RegardScore.py +7 -1
- validmind/tests/model_validation/RegressionResidualsPlot.py +5 -1
- validmind/tests/model_validation/RougeScore.py +8 -1
- validmind/tests/model_validation/TimeSeriesPredictionWithCI.py +8 -1
- validmind/tests/model_validation/TimeSeriesPredictionsPlot.py +7 -1
- validmind/tests/model_validation/TimeSeriesR2SquareBySegments.py +6 -1
- validmind/tests/model_validation/TokenDisparity.py +6 -1
- validmind/tests/model_validation/ToxicityScore.py +6 -1
- validmind/tests/model_validation/embeddings/ClusterDistribution.py +6 -1
- validmind/tests/model_validation/embeddings/CosineSimilarityComparison.py +6 -1
- validmind/tests/model_validation/embeddings/CosineSimilarityDistribution.py +6 -1
- validmind/tests/model_validation/embeddings/CosineSimilarityHeatmap.py +7 -3
- validmind/tests/model_validation/embeddings/DescriptiveAnalytics.py +6 -1
- validmind/tests/model_validation/embeddings/EmbeddingsVisualization2D.py +4 -3
- validmind/tests/model_validation/embeddings/EuclideanDistanceComparison.py +6 -1
- validmind/tests/model_validation/embeddings/EuclideanDistanceHeatmap.py +7 -3
- validmind/tests/model_validation/embeddings/PCAComponentsPairwisePlots.py +6 -1
- validmind/tests/model_validation/embeddings/StabilityAnalysisKeyword.py +5 -2
- validmind/tests/model_validation/embeddings/StabilityAnalysisRandomNoise.py +5 -1
- validmind/tests/model_validation/embeddings/StabilityAnalysisSynonyms.py +4 -1
- validmind/tests/model_validation/embeddings/StabilityAnalysisTranslation.py +5 -1
- validmind/tests/model_validation/embeddings/TSNEComponentsPairwisePlots.py +9 -6
- validmind/tests/model_validation/ragas/AnswerCorrectness.py +8 -5
- validmind/tests/model_validation/ragas/AspectCritic.py +11 -8
- validmind/tests/model_validation/ragas/ContextEntityRecall.py +5 -2
- validmind/tests/model_validation/ragas/ContextPrecision.py +5 -2
- validmind/tests/model_validation/ragas/ContextPrecisionWithoutReference.py +5 -2
- validmind/tests/model_validation/ragas/ContextRecall.py +6 -2
- validmind/tests/model_validation/ragas/Faithfulness.py +9 -5
- validmind/tests/model_validation/ragas/NoiseSensitivity.py +10 -7
- validmind/tests/model_validation/ragas/ResponseRelevancy.py +9 -6
- validmind/tests/model_validation/ragas/SemanticSimilarity.py +7 -4
- validmind/tests/model_validation/sklearn/AdjustedMutualInformation.py +5 -1
- validmind/tests/model_validation/sklearn/AdjustedRandIndex.py +5 -1
- validmind/tests/model_validation/sklearn/CalibrationCurve.py +5 -1
- validmind/tests/model_validation/sklearn/ClassifierPerformance.py +5 -1
- validmind/tests/model_validation/sklearn/ClusterCosineSimilarity.py +5 -1
- validmind/tests/model_validation/sklearn/ClusterPerformanceMetrics.py +5 -1
- validmind/tests/model_validation/sklearn/CompletenessScore.py +5 -1
- validmind/tests/model_validation/sklearn/ConfusionMatrix.py +4 -1
- validmind/tests/model_validation/sklearn/FeatureImportance.py +5 -1
- validmind/tests/model_validation/sklearn/FowlkesMallowsScore.py +5 -1
- validmind/tests/model_validation/sklearn/HomogeneityScore.py +5 -1
- validmind/tests/model_validation/sklearn/HyperParametersTuning.py +2 -4
- validmind/tests/model_validation/sklearn/KMeansClustersOptimization.py +3 -3
- validmind/tests/model_validation/sklearn/MinimumAccuracy.py +5 -1
- validmind/tests/model_validation/sklearn/MinimumF1Score.py +5 -1
- validmind/tests/model_validation/sklearn/MinimumROCAUCScore.py +5 -1
- validmind/tests/model_validation/sklearn/ModelParameters.py +6 -1
- validmind/tests/model_validation/sklearn/ModelsPerformanceComparison.py +5 -1
- validmind/tests/model_validation/sklearn/OverfitDiagnosis.py +3 -2
- validmind/tests/model_validation/sklearn/PermutationFeatureImportance.py +4 -4
- validmind/tests/model_validation/sklearn/PopulationStabilityIndex.py +2 -2
- validmind/tests/model_validation/sklearn/PrecisionRecallCurve.py +5 -1
- validmind/tests/model_validation/sklearn/ROCCurve.py +3 -1
- validmind/tests/model_validation/sklearn/RegressionErrors.py +6 -1
- validmind/tests/model_validation/sklearn/RegressionErrorsComparison.py +6 -1
- validmind/tests/model_validation/sklearn/RegressionPerformance.py +5 -1
- validmind/tests/model_validation/sklearn/RegressionR2Square.py +6 -1
- validmind/tests/model_validation/sklearn/RegressionR2SquareComparison.py +6 -1
- validmind/tests/model_validation/sklearn/RobustnessDiagnosis.py +2 -2
- validmind/tests/model_validation/sklearn/ScoreProbabilityAlignment.py +3 -1
- validmind/tests/model_validation/sklearn/SilhouettePlot.py +6 -1
- validmind/tests/model_validation/sklearn/TrainingTestDegradation.py +2 -2
- validmind/tests/model_validation/sklearn/VMeasure.py +5 -1
- validmind/tests/model_validation/sklearn/WeakspotsDiagnosis.py +6 -5
- validmind/tests/model_validation/statsmodels/AutoARIMA.py +3 -1
- validmind/tests/model_validation/statsmodels/CumulativePredictionProbabilities.py +6 -1
- validmind/tests/model_validation/statsmodels/DurbinWatsonTest.py +6 -1
- validmind/tests/model_validation/statsmodels/GINITable.py +4 -1
- validmind/tests/model_validation/statsmodels/KolmogorovSmirnov.py +5 -1
- validmind/tests/model_validation/statsmodels/Lilliefors.py +3 -1
- validmind/tests/model_validation/statsmodels/PredictionProbabilitiesHistogram.py +6 -2
- validmind/tests/model_validation/statsmodels/RegressionCoeffs.py +4 -1
- validmind/tests/model_validation/statsmodels/RegressionFeatureSignificance.py +7 -2
- validmind/tests/model_validation/statsmodels/RegressionModelForecastPlot.py +5 -4
- validmind/tests/model_validation/statsmodels/RegressionModelForecastPlotLevels.py +4 -1
- validmind/tests/model_validation/statsmodels/RegressionModelSensitivityPlot.py +3 -2
- validmind/tests/model_validation/statsmodels/RegressionModelSummary.py +5 -1
- validmind/tests/model_validation/statsmodels/RegressionPermutationFeatureImportance.py +3 -1
- validmind/tests/model_validation/statsmodels/ScorecardHistogram.py +6 -1
- validmind/tests/ongoing_monitoring/CalibrationCurveDrift.py +2 -2
- validmind/tests/ongoing_monitoring/ClassDiscriminationDrift.py +2 -2
- validmind/tests/ongoing_monitoring/ClassImbalanceDrift.py +2 -2
- validmind/tests/ongoing_monitoring/ClassificationAccuracyDrift.py +2 -2
- validmind/tests/ongoing_monitoring/ConfusionMatrixDrift.py +2 -2
- validmind/tests/ongoing_monitoring/CumulativePredictionProbabilitiesDrift.py +2 -2
- validmind/tests/ongoing_monitoring/FeatureDrift.py +5 -2
- validmind/tests/ongoing_monitoring/PredictionAcrossEachFeature.py +6 -1
- validmind/tests/ongoing_monitoring/PredictionCorrelation.py +8 -1
- validmind/tests/ongoing_monitoring/PredictionProbabilitiesHistogramDrift.py +2 -2
- validmind/tests/ongoing_monitoring/PredictionQuantilesAcrossFeatures.py +6 -1
- validmind/tests/ongoing_monitoring/ROCCurveDrift.py +4 -2
- validmind/tests/ongoing_monitoring/ScoreBandsDrift.py +2 -2
- validmind/tests/ongoing_monitoring/ScorecardHistogramDrift.py +2 -2
- validmind/tests/ongoing_monitoring/TargetPredictionDistributionPlot.py +8 -1
- validmind/tests/prompt_validation/Bias.py +5 -1
- validmind/tests/prompt_validation/Clarity.py +5 -1
- validmind/tests/prompt_validation/Conciseness.py +5 -1
- validmind/tests/prompt_validation/Delimitation.py +5 -1
- validmind/tests/prompt_validation/NegativeInstruction.py +5 -1
- validmind/tests/prompt_validation/Robustness.py +5 -1
- validmind/tests/prompt_validation/Specificity.py +5 -1
- validmind/unit_metrics/classification/Accuracy.py +2 -1
- validmind/unit_metrics/classification/F1.py +2 -1
- validmind/unit_metrics/classification/Precision.py +2 -1
- validmind/unit_metrics/classification/ROC_AUC.py +2 -1
- validmind/unit_metrics/classification/Recall.py +2 -1
- validmind/unit_metrics/regression/AdjustedRSquaredScore.py +2 -1
- validmind/unit_metrics/regression/GiniCoefficient.py +2 -1
- validmind/unit_metrics/regression/HuberLoss.py +2 -1
- validmind/unit_metrics/regression/KolmogorovSmirnovStatistic.py +2 -1
- validmind/unit_metrics/regression/MeanAbsoluteError.py +2 -1
- validmind/unit_metrics/regression/MeanAbsolutePercentageError.py +2 -1
- validmind/unit_metrics/regression/MeanBiasDeviation.py +2 -1
- validmind/unit_metrics/regression/MeanSquaredError.py +2 -1
- validmind/unit_metrics/regression/QuantileLoss.py +1 -1
- validmind/unit_metrics/regression/RSquaredScore.py +2 -1
- validmind/unit_metrics/regression/RootMeanSquaredError.py +2 -1
- validmind/vm_models/dataset/dataset.py +145 -38
- {validmind-2.8.28.dist-info → validmind-2.8.29.dist-info}/METADATA +1 -1
- {validmind-2.8.28.dist-info → validmind-2.8.29.dist-info}/RECORD +204 -204
- {validmind-2.8.28.dist-info → validmind-2.8.29.dist-info}/LICENSE +0 -0
- {validmind-2.8.28.dist-info → validmind-2.8.29.dist-info}/WHEEL +0 -0
- {validmind-2.8.28.dist-info → validmind-2.8.29.dist-info}/entry_points.txt +0 -0
@@ -2,24 +2,28 @@
|
|
2
2
|
# See the LICENSE file in the root of this repository for details.
|
3
3
|
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
4
|
|
5
|
+
from typing import Tuple
|
6
|
+
|
5
7
|
import numpy as np
|
6
8
|
import plotly.express as px
|
9
|
+
import plotly.graph_objects as go
|
7
10
|
from sklearn.metrics.pairwise import cosine_similarity
|
8
11
|
|
9
12
|
from validmind import RawData, tags, tasks
|
13
|
+
from validmind.vm_models import VMDataset, VMModel
|
10
14
|
|
11
15
|
|
12
16
|
@tags("visualization", "dimensionality_reduction", "embeddings")
|
13
17
|
@tasks("text_qa", "text_generation", "text_summarization")
|
14
18
|
def CosineSimilarityHeatmap(
|
15
|
-
dataset,
|
16
|
-
model,
|
19
|
+
dataset: VMDataset,
|
20
|
+
model: VMModel,
|
17
21
|
title="Cosine Similarity Matrix",
|
18
22
|
color="Cosine Similarity",
|
19
23
|
xaxis_title="Index",
|
20
24
|
yaxis_title="Index",
|
21
25
|
color_scale="Blues",
|
22
|
-
):
|
26
|
+
) -> Tuple[go.Figure, RawData]:
|
23
27
|
"""
|
24
28
|
Generates an interactive heatmap to visualize the cosine similarities among embeddings derived from a given model.
|
25
29
|
|
@@ -2,8 +2,11 @@
|
|
2
2
|
# See the LICENSE file in the root of this repository for details.
|
3
3
|
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
4
|
|
5
|
+
from typing import Tuple
|
6
|
+
|
5
7
|
import numpy as np
|
6
8
|
import plotly.express as px
|
9
|
+
import plotly.graph_objects as go
|
7
10
|
|
8
11
|
from validmind import RawData, tags, tasks
|
9
12
|
from validmind.vm_models import VMDataset, VMModel
|
@@ -11,7 +14,9 @@ from validmind.vm_models import VMDataset, VMModel
|
|
11
14
|
|
12
15
|
@tags("llm", "text_data", "embeddings", "visualization")
|
13
16
|
@tasks("feature_extraction")
|
14
|
-
def DescriptiveAnalytics(
|
17
|
+
def DescriptiveAnalytics(
|
18
|
+
dataset: VMDataset, model: VMModel
|
19
|
+
) -> Tuple[go.Figure, go.Figure, go.Figure, RawData]:
|
15
20
|
"""
|
16
21
|
Evaluates statistical properties of text embeddings in an ML model via mean, median, and standard deviation
|
17
22
|
histograms.
|
@@ -2,9 +2,10 @@
|
|
2
2
|
# See the LICENSE file in the root of this repository for details.
|
3
3
|
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
4
|
|
5
|
-
from typing import Union
|
5
|
+
from typing import Tuple, Union
|
6
6
|
|
7
7
|
import plotly.express as px
|
8
|
+
import plotly.graph_objects as go
|
8
9
|
from sklearn.manifold import TSNE
|
9
10
|
|
10
11
|
from validmind import RawData, tags, tasks
|
@@ -17,11 +18,11 @@ logger = get_logger(__name__)
|
|
17
18
|
@tags("llm", "text_data", "embeddings", "visualization")
|
18
19
|
@tasks("feature_extraction")
|
19
20
|
def EmbeddingsVisualization2D(
|
20
|
-
model: VMModel,
|
21
21
|
dataset: VMDataset,
|
22
|
+
model: VMModel,
|
22
23
|
cluster_column: Union[str, None] = None,
|
23
24
|
perplexity: int = 30,
|
24
|
-
):
|
25
|
+
) -> Tuple[go.Figure, RawData]:
|
25
26
|
"""
|
26
27
|
Visualizes 2D representation of text embeddings generated by a model using t-SNE technique.
|
27
28
|
|
@@ -3,18 +3,23 @@
|
|
3
3
|
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
4
|
|
5
5
|
from itertools import combinations
|
6
|
+
from typing import List, Tuple
|
6
7
|
|
7
8
|
import numpy as np
|
8
9
|
import pandas as pd
|
9
10
|
import plotly.express as px
|
11
|
+
import plotly.graph_objects as go
|
10
12
|
from sklearn.metrics.pairwise import euclidean_distances
|
11
13
|
|
12
14
|
from validmind import RawData, tags, tasks
|
15
|
+
from validmind.vm_models import VMDataset, VMModel
|
13
16
|
|
14
17
|
|
15
18
|
@tags("visualization", "dimensionality_reduction", "embeddings")
|
16
19
|
@tasks("text_qa", "text_generation", "text_summarization")
|
17
|
-
def EuclideanDistanceComparison(
|
20
|
+
def EuclideanDistanceComparison(
|
21
|
+
dataset: VMDataset, models: List[VMModel]
|
22
|
+
) -> Tuple[pd.DataFrame, go.Figure, RawData]:
|
18
23
|
"""
|
19
24
|
Assesses and visualizes the dissimilarity between model embeddings using Euclidean distance, providing insights
|
20
25
|
into model behavior and potential redundancy or diversity.
|
@@ -2,24 +2,28 @@
|
|
2
2
|
# See the LICENSE file in the root of this repository for details.
|
3
3
|
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
4
|
|
5
|
+
from typing import Tuple
|
6
|
+
|
5
7
|
import numpy as np
|
6
8
|
import plotly.express as px
|
9
|
+
import plotly.graph_objects as go
|
7
10
|
from sklearn.metrics.pairwise import euclidean_distances
|
8
11
|
|
9
12
|
from validmind import RawData, tags, tasks
|
13
|
+
from validmind.vm_models import VMDataset, VMModel
|
10
14
|
|
11
15
|
|
12
16
|
@tags("visualization", "dimensionality_reduction", "embeddings")
|
13
17
|
@tasks("text_qa", "text_generation", "text_summarization")
|
14
18
|
def EuclideanDistanceHeatmap(
|
15
|
-
dataset,
|
16
|
-
model,
|
19
|
+
dataset: VMDataset,
|
20
|
+
model: VMModel,
|
17
21
|
title="Euclidean Distance Matrix",
|
18
22
|
color="Euclidean Distance",
|
19
23
|
xaxis_title="Index",
|
20
24
|
yaxis_title="Index",
|
21
25
|
color_scale="Blues",
|
22
|
-
):
|
26
|
+
) -> Tuple[go.Figure, RawData]:
|
23
27
|
"""
|
24
28
|
Generates an interactive heatmap to visualize the Euclidean distances among embeddings derived from a given model.
|
25
29
|
|
@@ -3,19 +3,24 @@
|
|
3
3
|
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
4
|
|
5
5
|
import itertools
|
6
|
+
from typing import Tuple
|
6
7
|
|
7
8
|
import numpy as np
|
8
9
|
import pandas as pd
|
9
10
|
import plotly.express as px
|
11
|
+
import plotly.graph_objects as go
|
10
12
|
from sklearn.decomposition import PCA
|
11
13
|
from sklearn.preprocessing import StandardScaler
|
12
14
|
|
13
15
|
from validmind import RawData, tags, tasks
|
16
|
+
from validmind.vm_models import VMDataset, VMModel
|
14
17
|
|
15
18
|
|
16
19
|
@tags("visualization", "dimensionality_reduction", "embeddings")
|
17
20
|
@tasks("text_qa", "text_generation", "text_summarization")
|
18
|
-
def PCAComponentsPairwisePlots(
|
21
|
+
def PCAComponentsPairwisePlots(
|
22
|
+
dataset: VMDataset, model: VMModel, n_components: int = 3
|
23
|
+
) -> Tuple[go.Figure, RawData]:
|
19
24
|
"""
|
20
25
|
Generates scatter plots for pairwise combinations of principal component analysis (PCA) components of model
|
21
26
|
embeddings.
|
@@ -3,7 +3,10 @@
|
|
3
3
|
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
4
|
|
5
5
|
import re
|
6
|
-
from typing import Dict
|
6
|
+
from typing import Dict, Tuple
|
7
|
+
|
8
|
+
import pandas as pd
|
9
|
+
import plotly.graph_objects as go
|
7
10
|
|
8
11
|
from validmind import RawData, tags, tasks
|
9
12
|
from validmind.vm_models import VMDataset, VMModel
|
@@ -18,7 +21,7 @@ def StabilityAnalysisKeyword(
|
|
18
21
|
model: VMModel,
|
19
22
|
keyword_dict: Dict[str, str],
|
20
23
|
mean_similarity_threshold: float = 0.7,
|
21
|
-
):
|
24
|
+
) -> Tuple[go.Figure, pd.DataFrame, RawData]:
|
22
25
|
"""
|
23
26
|
Evaluates robustness of embedding models to keyword swaps in the test dataset.
|
24
27
|
|
@@ -4,6 +4,10 @@
|
|
4
4
|
|
5
5
|
import random
|
6
6
|
import string
|
7
|
+
from typing import Tuple
|
8
|
+
|
9
|
+
import pandas as pd
|
10
|
+
import plotly.graph_objects as go
|
7
11
|
|
8
12
|
from validmind import RawData, tags, tasks
|
9
13
|
from validmind.vm_models import VMDataset, VMModel
|
@@ -69,7 +73,7 @@ def StabilityAnalysisRandomNoise(
|
|
69
73
|
model: VMModel,
|
70
74
|
probability: float = 0.02,
|
71
75
|
mean_similarity_threshold: float = 0.7,
|
72
|
-
):
|
76
|
+
) -> Tuple[go.Figure, pd.DataFrame, RawData]:
|
73
77
|
"""
|
74
78
|
Assesses the robustness of text embeddings models to random noise introduced via text perturbations.
|
75
79
|
|
@@ -3,8 +3,11 @@
|
|
3
3
|
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
4
|
|
5
5
|
import random
|
6
|
+
from typing import Tuple
|
6
7
|
|
7
8
|
import nltk
|
9
|
+
import pandas as pd
|
10
|
+
import plotly.graph_objects as go
|
8
11
|
from nltk.corpus import wordnet as wn
|
9
12
|
|
10
13
|
from validmind import RawData, tags, tasks
|
@@ -20,7 +23,7 @@ def StabilityAnalysisSynonyms(
|
|
20
23
|
model: VMModel,
|
21
24
|
probability: float = 0.02,
|
22
25
|
mean_similarity_threshold: float = 0.7,
|
23
|
-
):
|
26
|
+
) -> Tuple[go.Figure, pd.DataFrame, RawData]:
|
24
27
|
"""
|
25
28
|
Evaluates the stability of text embeddings models when words in test data are replaced by their synonyms randomly.
|
26
29
|
|
@@ -2,6 +2,10 @@
|
|
2
2
|
# See the LICENSE file in the root of this repository for details.
|
3
3
|
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
4
|
|
5
|
+
from typing import Tuple
|
6
|
+
|
7
|
+
import pandas as pd
|
8
|
+
import plotly.graph_objects as go
|
5
9
|
from transformers import MarianMTModel, MarianTokenizer
|
6
10
|
|
7
11
|
from validmind import RawData, tags, tasks
|
@@ -21,7 +25,7 @@ def StabilityAnalysisTranslation(
|
|
21
25
|
source_lang: str = "en",
|
22
26
|
target_lang: str = "fr",
|
23
27
|
mean_similarity_threshold: float = 0.7,
|
24
|
-
):
|
28
|
+
) -> Tuple[go.Figure, pd.DataFrame, RawData]:
|
25
29
|
"""
|
26
30
|
Evaluates robustness of text embeddings models to noise introduced by translating the original text to another
|
27
31
|
language and back.
|
@@ -3,25 +3,28 @@
|
|
3
3
|
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
4
|
|
5
5
|
import itertools
|
6
|
+
from typing import Tuple
|
6
7
|
|
7
8
|
import numpy as np
|
8
9
|
import pandas as pd
|
9
10
|
import plotly.express as px
|
11
|
+
import plotly.graph_objects as go
|
10
12
|
from sklearn.manifold import TSNE
|
11
13
|
from sklearn.preprocessing import StandardScaler
|
12
14
|
|
13
15
|
from validmind import RawData, tags, tasks
|
16
|
+
from validmind.vm_models import VMDataset, VMModel
|
14
17
|
|
15
18
|
|
16
19
|
@tags("visualization", "dimensionality_reduction", "embeddings")
|
17
20
|
@tasks("text_qa", "text_generation", "text_summarization")
|
18
21
|
def TSNEComponentsPairwisePlots(
|
19
|
-
dataset,
|
20
|
-
model,
|
21
|
-
n_components=2,
|
22
|
-
perplexity=30,
|
23
|
-
title="t-SNE",
|
24
|
-
):
|
22
|
+
dataset: VMDataset,
|
23
|
+
model: VMModel,
|
24
|
+
n_components: int = 2,
|
25
|
+
perplexity: int = 30,
|
26
|
+
title: str = "t-SNE",
|
27
|
+
) -> Tuple[go.Figure, RawData]:
|
25
28
|
"""
|
26
29
|
Creates scatter plots for pairwise combinations of t-SNE components to visualize embeddings and highlight potential
|
27
30
|
clustering structures.
|
@@ -3,12 +3,15 @@
|
|
3
3
|
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
4
|
|
5
5
|
import warnings
|
6
|
+
from typing import Dict, Tuple
|
6
7
|
|
7
8
|
import plotly.express as px
|
9
|
+
import plotly.graph_objects as go
|
8
10
|
from datasets import Dataset
|
9
11
|
|
10
12
|
from validmind import RawData, tags, tasks
|
11
13
|
from validmind.errors import MissingDependencyError
|
14
|
+
from validmind.vm_models import VMDataset
|
12
15
|
|
13
16
|
from .utils import get_ragas_config, get_renamed_columns
|
14
17
|
|
@@ -30,13 +33,13 @@ except ImportError as e:
|
|
30
33
|
@tags("ragas", "llm")
|
31
34
|
@tasks("text_qa", "text_generation", "text_summarization")
|
32
35
|
def AnswerCorrectness(
|
33
|
-
dataset,
|
34
|
-
user_input_column="user_input",
|
35
|
-
response_column="response",
|
36
|
-
reference_column="reference",
|
36
|
+
dataset: VMDataset,
|
37
|
+
user_input_column: str = "user_input",
|
38
|
+
response_column: str = "response",
|
39
|
+
reference_column: str = "reference",
|
37
40
|
judge_llm=None,
|
38
41
|
judge_embeddings=None,
|
39
|
-
):
|
42
|
+
) -> Tuple[Dict[str, list], go.Figure, go.Figure, RawData]:
|
40
43
|
"""
|
41
44
|
Evaluates the correctness of answers in a dataset with respect to the provided ground
|
42
45
|
truths and visualizes the results in a histogram.
|
@@ -3,12 +3,15 @@
|
|
3
3
|
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
4
|
|
5
5
|
import warnings
|
6
|
+
from typing import Dict, List, Optional, Tuple
|
6
7
|
|
7
8
|
import plotly.express as px
|
9
|
+
import plotly.graph_objects as go
|
8
10
|
from datasets import Dataset
|
9
11
|
|
10
12
|
from validmind import RawData, tags, tasks
|
11
13
|
from validmind.errors import MissingDependencyError
|
14
|
+
from validmind.vm_models import VMDataset
|
12
15
|
|
13
16
|
from .utils import get_ragas_config, get_renamed_columns
|
14
17
|
|
@@ -39,21 +42,21 @@ LOWER_IS_BETTER_ASPECTS = ["harmfulness", "maliciousness"]
|
|
39
42
|
@tags("ragas", "llm", "qualitative")
|
40
43
|
@tasks("text_summarization", "text_generation", "text_qa")
|
41
44
|
def AspectCritic(
|
42
|
-
dataset,
|
43
|
-
user_input_column="user_input",
|
44
|
-
response_column="response",
|
45
|
-
retrieved_contexts_column=None,
|
46
|
-
aspects:
|
45
|
+
dataset: VMDataset,
|
46
|
+
user_input_column: str = "user_input",
|
47
|
+
response_column: str = "response",
|
48
|
+
retrieved_contexts_column: Optional[str] = None,
|
49
|
+
aspects: List[str] = [
|
47
50
|
"coherence",
|
48
51
|
"conciseness",
|
49
52
|
"correctness",
|
50
53
|
"harmfulness",
|
51
54
|
"maliciousness",
|
52
55
|
],
|
53
|
-
additional_aspects:
|
56
|
+
additional_aspects: Optional[List[Tuple[str, str]]] = None,
|
54
57
|
judge_llm=None,
|
55
58
|
judge_embeddings=None,
|
56
|
-
):
|
59
|
+
) -> Tuple[Dict[str, list], go.Figure, RawData]:
|
57
60
|
"""
|
58
61
|
Evaluates generations against the following aspects: harmfulness, maliciousness,
|
59
62
|
coherence, correctness, and conciseness.
|
@@ -146,8 +149,8 @@ def AspectCritic(
|
|
146
149
|
|
147
150
|
if retrieved_contexts_column:
|
148
151
|
required_columns["retrieved_contexts"] = retrieved_contexts_column
|
149
|
-
|
150
152
|
df = get_renamed_columns(dataset._df, required_columns)
|
153
|
+
df = df[required_columns.keys()]
|
151
154
|
|
152
155
|
custom_aspects = (
|
153
156
|
[
|
@@ -3,12 +3,15 @@
|
|
3
3
|
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
4
|
|
5
5
|
import warnings
|
6
|
+
from typing import Dict, Tuple
|
6
7
|
|
7
8
|
import plotly.express as px
|
9
|
+
import plotly.graph_objects as go
|
8
10
|
from datasets import Dataset
|
9
11
|
|
10
12
|
from validmind import RawData, tags, tasks
|
11
13
|
from validmind.errors import MissingDependencyError
|
14
|
+
from validmind.vm_models import VMDataset
|
12
15
|
|
13
16
|
from .utils import get_ragas_config, get_renamed_columns
|
14
17
|
|
@@ -30,12 +33,12 @@ except ImportError as e:
|
|
30
33
|
@tags("ragas", "llm", "retrieval_performance")
|
31
34
|
@tasks("text_qa", "text_generation", "text_summarization")
|
32
35
|
def ContextEntityRecall(
|
33
|
-
dataset,
|
36
|
+
dataset: VMDataset,
|
34
37
|
retrieved_contexts_column: str = "retrieved_contexts",
|
35
38
|
reference_column: str = "reference",
|
36
39
|
judge_llm=None,
|
37
40
|
judge_embeddings=None,
|
38
|
-
):
|
41
|
+
) -> Tuple[Dict[str, list], go.Figure, go.Figure, RawData]:
|
39
42
|
"""
|
40
43
|
Evaluates the context entity recall for dataset entries and visualizes the results.
|
41
44
|
|
@@ -3,12 +3,15 @@
|
|
3
3
|
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
4
|
|
5
5
|
import warnings
|
6
|
+
from typing import Dict, Tuple
|
6
7
|
|
7
8
|
import plotly.express as px
|
9
|
+
import plotly.graph_objects as go
|
8
10
|
from datasets import Dataset
|
9
11
|
|
10
12
|
from validmind import RawData, tags, tasks
|
11
13
|
from validmind.errors import MissingDependencyError
|
14
|
+
from validmind.vm_models import VMDataset
|
12
15
|
|
13
16
|
from .utils import get_ragas_config, get_renamed_columns
|
14
17
|
|
@@ -30,13 +33,13 @@ except ImportError as e:
|
|
30
33
|
@tags("ragas", "llm", "retrieval_performance")
|
31
34
|
@tasks("text_qa", "text_generation", "text_summarization", "text_classification")
|
32
35
|
def ContextPrecision(
|
33
|
-
dataset,
|
36
|
+
dataset: VMDataset,
|
34
37
|
user_input_column: str = "user_input",
|
35
38
|
retrieved_contexts_column: str = "retrieved_contexts",
|
36
39
|
reference_column: str = "reference",
|
37
40
|
judge_llm=None,
|
38
41
|
judge_embeddings=None,
|
39
|
-
)
|
42
|
+
) -> Tuple[Dict[str, list], go.Figure, go.Figure, RawData]:
|
40
43
|
"""
|
41
44
|
Context Precision is a metric that evaluates whether all of the ground-truth
|
42
45
|
relevant items present in the contexts are ranked higher or not. Ideally all the
|
@@ -3,12 +3,15 @@
|
|
3
3
|
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
4
|
|
5
5
|
import warnings
|
6
|
+
from typing import Dict, Tuple
|
6
7
|
|
7
8
|
import plotly.express as px
|
9
|
+
import plotly.graph_objects as go
|
8
10
|
from datasets import Dataset
|
9
11
|
|
10
12
|
from validmind import RawData, tags, tasks
|
11
13
|
from validmind.errors import MissingDependencyError
|
14
|
+
from validmind.vm_models import VMDataset
|
12
15
|
|
13
16
|
from .utils import get_ragas_config, get_renamed_columns
|
14
17
|
|
@@ -30,13 +33,13 @@ except ImportError as e:
|
|
30
33
|
@tags("ragas", "llm", "retrieval_performance")
|
31
34
|
@tasks("text_qa", "text_generation", "text_summarization", "text_classification")
|
32
35
|
def ContextPrecisionWithoutReference(
|
33
|
-
dataset,
|
36
|
+
dataset: VMDataset,
|
34
37
|
user_input_column: str = "user_input",
|
35
38
|
retrieved_contexts_column: str = "retrieved_contexts",
|
36
39
|
response_column: str = "response",
|
37
40
|
judge_llm=None,
|
38
41
|
judge_embeddings=None,
|
39
|
-
)
|
42
|
+
) -> Tuple[Dict[str, list], go.Figure, go.Figure, RawData]:
|
40
43
|
"""
|
41
44
|
Context Precision Without Reference is a metric used to evaluate the relevance of
|
42
45
|
retrieved contexts compared to the expected response for a given user input. This
|
@@ -3,12 +3,15 @@
|
|
3
3
|
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
4
|
|
5
5
|
import warnings
|
6
|
+
from typing import Dict, Tuple
|
6
7
|
|
7
8
|
import plotly.express as px
|
9
|
+
import plotly.graph_objects as go
|
8
10
|
from datasets import Dataset
|
9
11
|
|
10
12
|
from validmind import RawData, tags, tasks
|
11
13
|
from validmind.errors import MissingDependencyError
|
14
|
+
from validmind.vm_models import VMDataset
|
12
15
|
|
13
16
|
from .utils import get_ragas_config, get_renamed_columns
|
14
17
|
|
@@ -30,13 +33,13 @@ except ImportError as e:
|
|
30
33
|
@tags("ragas", "llm", "retrieval_performance")
|
31
34
|
@tasks("text_qa", "text_generation", "text_summarization", "text_classification")
|
32
35
|
def ContextRecall(
|
33
|
-
dataset,
|
36
|
+
dataset: VMDataset,
|
34
37
|
user_input_column: str = "user_input",
|
35
38
|
retrieved_contexts_column: str = "retrieved_contexts",
|
36
39
|
reference_column: str = "reference",
|
37
40
|
judge_llm=None,
|
38
41
|
judge_embeddings=None,
|
39
|
-
):
|
42
|
+
) -> Tuple[Dict[str, list], go.Figure, go.Figure, RawData]:
|
40
43
|
"""
|
41
44
|
Context recall measures the extent to which the retrieved context aligns with the
|
42
45
|
annotated answer, treated as the ground truth. It is computed based on the `ground
|
@@ -109,6 +112,7 @@ def ContextRecall(
|
|
109
112
|
}
|
110
113
|
|
111
114
|
df = get_renamed_columns(dataset._df, required_columns)
|
115
|
+
df = df[required_columns.keys()]
|
112
116
|
|
113
117
|
result_df = evaluate(
|
114
118
|
Dataset.from_pandas(df),
|
@@ -3,12 +3,15 @@
|
|
3
3
|
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
4
|
|
5
5
|
import warnings
|
6
|
+
from typing import Dict, Tuple
|
6
7
|
|
7
8
|
import plotly.express as px
|
9
|
+
import plotly.graph_objects as go
|
8
10
|
from datasets import Dataset
|
9
11
|
|
10
12
|
from validmind import RawData, tags, tasks
|
11
13
|
from validmind.errors import MissingDependencyError
|
14
|
+
from validmind.vm_models import VMDataset
|
12
15
|
|
13
16
|
from .utils import get_ragas_config, get_renamed_columns
|
14
17
|
|
@@ -30,13 +33,13 @@ except ImportError as e:
|
|
30
33
|
@tags("ragas", "llm", "rag_performance")
|
31
34
|
@tasks("text_qa", "text_generation", "text_summarization")
|
32
35
|
def Faithfulness(
|
33
|
-
dataset,
|
34
|
-
user_input_column="user_input",
|
35
|
-
response_column="response",
|
36
|
-
retrieved_contexts_column="retrieved_contexts",
|
36
|
+
dataset: VMDataset,
|
37
|
+
user_input_column: str = "user_input",
|
38
|
+
response_column: str = "response",
|
39
|
+
retrieved_contexts_column: str = "retrieved_contexts",
|
37
40
|
judge_llm=None,
|
38
41
|
judge_embeddings=None,
|
39
|
-
):
|
42
|
+
) -> Tuple[Dict[str, list], go.Figure, go.Figure, RawData]:
|
40
43
|
"""
|
41
44
|
Evaluates the faithfulness of the generated answers with respect to retrieved contexts.
|
42
45
|
|
@@ -115,6 +118,7 @@ def Faithfulness(
|
|
115
118
|
|
116
119
|
df = get_renamed_columns(dataset._df, required_columns)
|
117
120
|
|
121
|
+
df = df[required_columns.keys()]
|
118
122
|
result_df = evaluate(
|
119
123
|
Dataset.from_pandas(df),
|
120
124
|
metrics=[faithfulness()],
|
@@ -3,12 +3,15 @@
|
|
3
3
|
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
4
|
|
5
5
|
import warnings
|
6
|
+
from typing import Dict, Tuple
|
6
7
|
|
7
8
|
import plotly.express as px
|
9
|
+
import plotly.graph_objects as go
|
8
10
|
from datasets import Dataset
|
9
11
|
|
10
12
|
from validmind import RawData, tags, tasks
|
11
13
|
from validmind.errors import MissingDependencyError
|
14
|
+
from validmind.vm_models import VMDataset
|
12
15
|
|
13
16
|
from .utils import get_ragas_config, get_renamed_columns
|
14
17
|
|
@@ -32,15 +35,15 @@ VALID_FOCUS_VALUES = ["relevant", "irrelevant"]
|
|
32
35
|
@tags("ragas", "llm", "rag_performance")
|
33
36
|
@tasks("text_qa", "text_generation", "text_summarization")
|
34
37
|
def NoiseSensitivity(
|
35
|
-
dataset,
|
36
|
-
response_column="response",
|
37
|
-
retrieved_contexts_column="retrieved_contexts",
|
38
|
-
reference_column="reference",
|
39
|
-
focus="relevant",
|
40
|
-
user_input_column="user_input",
|
38
|
+
dataset: VMDataset,
|
39
|
+
response_column: str = "response",
|
40
|
+
retrieved_contexts_column: str = "retrieved_contexts",
|
41
|
+
reference_column: str = "reference",
|
42
|
+
focus: str = "relevant",
|
43
|
+
user_input_column: str = "user_input",
|
41
44
|
judge_llm=None,
|
42
45
|
judge_embeddings=None,
|
43
|
-
):
|
46
|
+
) -> Tuple[Dict[str, list], go.Figure, go.Figure, RawData]:
|
44
47
|
"""
|
45
48
|
Assesses the sensitivity of a Large Language Model (LLM) to noise in retrieved context by measuring how often it
|
46
49
|
generates incorrect responses.
|
@@ -3,12 +3,15 @@
|
|
3
3
|
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
4
|
|
5
5
|
import warnings
|
6
|
+
from typing import Dict, Tuple
|
6
7
|
|
7
8
|
import plotly.express as px
|
9
|
+
import plotly.graph_objects as go
|
8
10
|
from datasets import Dataset
|
9
11
|
|
10
12
|
from validmind import RawData, tags, tasks
|
11
13
|
from validmind.errors import MissingDependencyError
|
14
|
+
from validmind.vm_models import VMDataset
|
12
15
|
|
13
16
|
from .utils import get_ragas_config, get_renamed_columns
|
14
17
|
|
@@ -30,13 +33,13 @@ except ImportError as e:
|
|
30
33
|
@tags("ragas", "llm", "rag_performance")
|
31
34
|
@tasks("text_qa", "text_generation", "text_summarization")
|
32
35
|
def ResponseRelevancy(
|
33
|
-
dataset,
|
34
|
-
user_input_column="user_input",
|
35
|
-
retrieved_contexts_column=None,
|
36
|
-
response_column="response",
|
36
|
+
dataset: VMDataset,
|
37
|
+
user_input_column: str = "user_input",
|
38
|
+
retrieved_contexts_column: str = None,
|
39
|
+
response_column: str = "response",
|
37
40
|
judge_llm=None,
|
38
41
|
judge_embeddings=None,
|
39
|
-
):
|
42
|
+
) -> Tuple[Dict[str, list], go.Figure, go.Figure, RawData]:
|
40
43
|
"""
|
41
44
|
Assesses how pertinent the generated answer is to the given prompt.
|
42
45
|
|
@@ -124,6 +127,7 @@ def ResponseRelevancy(
|
|
124
127
|
required_columns["retrieved_contexts"] = retrieved_contexts_column
|
125
128
|
|
126
129
|
df = get_renamed_columns(dataset._df, required_columns)
|
130
|
+
df = df[required_columns.keys()]
|
127
131
|
|
128
132
|
metrics = [response_relevancy()]
|
129
133
|
|
@@ -134,7 +138,6 @@ def ResponseRelevancy(
|
|
134
138
|
).to_pandas()
|
135
139
|
|
136
140
|
score_column = "answer_relevancy"
|
137
|
-
|
138
141
|
fig_histogram = px.histogram(
|
139
142
|
x=result_df[score_column].to_list(), nbins=10, title="Response Relevancy"
|
140
143
|
)
|