validmind 2.8.28__py3-none-any.whl → 2.9.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- validmind/__version__.py +1 -1
- validmind/ai/utils.py +4 -24
- validmind/api_client.py +6 -17
- validmind/logging.py +48 -0
- validmind/models/function.py +11 -3
- validmind/tests/__init__.py +2 -0
- validmind/tests/__types__.py +18 -0
- validmind/tests/data_validation/ACFandPACFPlot.py +3 -1
- validmind/tests/data_validation/ADF.py +3 -1
- validmind/tests/data_validation/AutoAR.py +3 -1
- validmind/tests/data_validation/AutoMA.py +5 -1
- validmind/tests/data_validation/AutoStationarity.py +5 -1
- validmind/tests/data_validation/BivariateScatterPlots.py +3 -1
- validmind/tests/data_validation/BoxPierce.py +4 -1
- validmind/tests/data_validation/ChiSquaredFeaturesTable.py +1 -1
- validmind/tests/data_validation/ClassImbalance.py +1 -1
- validmind/tests/data_validation/DatasetDescription.py +4 -1
- validmind/tests/data_validation/DatasetSplit.py +3 -2
- validmind/tests/data_validation/DescriptiveStatistics.py +3 -1
- validmind/tests/data_validation/DickeyFullerGLS.py +3 -1
- validmind/tests/data_validation/Duplicates.py +3 -1
- validmind/tests/data_validation/EngleGrangerCoint.py +6 -1
- validmind/tests/data_validation/FeatureTargetCorrelationPlot.py +3 -1
- validmind/tests/data_validation/HighCardinality.py +3 -1
- validmind/tests/data_validation/HighPearsonCorrelation.py +4 -1
- validmind/tests/data_validation/IQROutliersBarPlot.py +4 -1
- validmind/tests/data_validation/IQROutliersTable.py +6 -1
- validmind/tests/data_validation/IsolationForestOutliers.py +3 -1
- validmind/tests/data_validation/JarqueBera.py +3 -1
- validmind/tests/data_validation/KPSS.py +3 -1
- validmind/tests/data_validation/LJungBox.py +3 -1
- validmind/tests/data_validation/LaggedCorrelationHeatmap.py +6 -1
- validmind/tests/data_validation/MissingValues.py +5 -1
- validmind/tests/data_validation/MissingValuesBarPlot.py +3 -1
- validmind/tests/data_validation/MutualInformation.py +4 -1
- validmind/tests/data_validation/PearsonCorrelationMatrix.py +3 -1
- validmind/tests/data_validation/PhillipsPerronArch.py +3 -1
- validmind/tests/data_validation/ProtectedClassesCombination.py +5 -1
- validmind/tests/data_validation/ProtectedClassesDescription.py +5 -1
- validmind/tests/data_validation/ProtectedClassesDisparity.py +5 -3
- validmind/tests/data_validation/ProtectedClassesThresholdOptimizer.py +9 -2
- validmind/tests/data_validation/RollingStatsPlot.py +5 -1
- validmind/tests/data_validation/RunsTest.py +1 -1
- validmind/tests/data_validation/ScatterPlot.py +2 -1
- validmind/tests/data_validation/ScoreBandDefaultRates.py +3 -1
- validmind/tests/data_validation/SeasonalDecompose.py +6 -1
- validmind/tests/data_validation/ShapiroWilk.py +4 -1
- validmind/tests/data_validation/Skewness.py +3 -1
- validmind/tests/data_validation/SpreadPlot.py +3 -1
- validmind/tests/data_validation/TabularCategoricalBarPlots.py +4 -1
- validmind/tests/data_validation/TabularDateTimeHistograms.py +3 -1
- validmind/tests/data_validation/TabularDescriptionTables.py +4 -1
- validmind/tests/data_validation/TabularNumericalHistograms.py +3 -1
- validmind/tests/data_validation/TargetRateBarPlots.py +4 -1
- validmind/tests/data_validation/TimeSeriesDescription.py +1 -1
- validmind/tests/data_validation/TimeSeriesDescriptiveStatistics.py +1 -1
- validmind/tests/data_validation/TimeSeriesFrequency.py +5 -1
- validmind/tests/data_validation/TimeSeriesHistogram.py +4 -1
- validmind/tests/data_validation/TimeSeriesLinePlot.py +3 -1
- validmind/tests/data_validation/TimeSeriesMissingValues.py +6 -1
- validmind/tests/data_validation/TimeSeriesOutliers.py +5 -1
- validmind/tests/data_validation/TooManyZeroValues.py +6 -1
- validmind/tests/data_validation/UniqueRows.py +5 -1
- validmind/tests/data_validation/WOEBinPlots.py +4 -1
- validmind/tests/data_validation/WOEBinTable.py +5 -1
- validmind/tests/data_validation/ZivotAndrewsArch.py +3 -1
- validmind/tests/data_validation/nlp/CommonWords.py +2 -1
- validmind/tests/data_validation/nlp/Hashtags.py +2 -1
- validmind/tests/data_validation/nlp/LanguageDetection.py +4 -1
- validmind/tests/data_validation/nlp/Mentions.py +3 -1
- validmind/tests/data_validation/nlp/PolarityAndSubjectivity.py +6 -1
- validmind/tests/data_validation/nlp/Punctuations.py +2 -1
- validmind/tests/data_validation/nlp/Sentiment.py +3 -1
- validmind/tests/data_validation/nlp/StopWords.py +2 -1
- validmind/tests/data_validation/nlp/TextDescription.py +3 -1
- validmind/tests/data_validation/nlp/Toxicity.py +3 -1
- validmind/tests/load.py +91 -17
- validmind/tests/model_validation/BertScore.py +6 -3
- validmind/tests/model_validation/BleuScore.py +6 -1
- validmind/tests/model_validation/ClusterSizeDistribution.py +5 -1
- validmind/tests/model_validation/ContextualRecall.py +6 -1
- validmind/tests/model_validation/FeaturesAUC.py +5 -1
- validmind/tests/model_validation/MeteorScore.py +6 -1
- validmind/tests/model_validation/ModelMetadata.py +2 -1
- validmind/tests/model_validation/ModelPredictionResiduals.py +10 -2
- validmind/tests/model_validation/RegardScore.py +7 -1
- validmind/tests/model_validation/RegressionResidualsPlot.py +5 -1
- validmind/tests/model_validation/RougeScore.py +8 -1
- validmind/tests/model_validation/TimeSeriesPredictionWithCI.py +8 -1
- validmind/tests/model_validation/TimeSeriesPredictionsPlot.py +7 -1
- validmind/tests/model_validation/TimeSeriesR2SquareBySegments.py +6 -1
- validmind/tests/model_validation/TokenDisparity.py +6 -1
- validmind/tests/model_validation/ToxicityScore.py +6 -1
- validmind/tests/model_validation/embeddings/ClusterDistribution.py +6 -1
- validmind/tests/model_validation/embeddings/CosineSimilarityComparison.py +6 -1
- validmind/tests/model_validation/embeddings/CosineSimilarityDistribution.py +6 -1
- validmind/tests/model_validation/embeddings/CosineSimilarityHeatmap.py +7 -3
- validmind/tests/model_validation/embeddings/DescriptiveAnalytics.py +6 -1
- validmind/tests/model_validation/embeddings/EmbeddingsVisualization2D.py +4 -3
- validmind/tests/model_validation/embeddings/EuclideanDistanceComparison.py +6 -1
- validmind/tests/model_validation/embeddings/EuclideanDistanceHeatmap.py +7 -3
- validmind/tests/model_validation/embeddings/PCAComponentsPairwisePlots.py +6 -1
- validmind/tests/model_validation/embeddings/StabilityAnalysisKeyword.py +5 -2
- validmind/tests/model_validation/embeddings/StabilityAnalysisRandomNoise.py +5 -1
- validmind/tests/model_validation/embeddings/StabilityAnalysisSynonyms.py +4 -1
- validmind/tests/model_validation/embeddings/StabilityAnalysisTranslation.py +5 -1
- validmind/tests/model_validation/embeddings/TSNEComponentsPairwisePlots.py +9 -6
- validmind/tests/model_validation/ragas/AnswerCorrectness.py +8 -5
- validmind/tests/model_validation/ragas/AspectCritic.py +11 -8
- validmind/tests/model_validation/ragas/ContextEntityRecall.py +5 -2
- validmind/tests/model_validation/ragas/ContextPrecision.py +5 -2
- validmind/tests/model_validation/ragas/ContextPrecisionWithoutReference.py +5 -2
- validmind/tests/model_validation/ragas/ContextRecall.py +6 -2
- validmind/tests/model_validation/ragas/Faithfulness.py +9 -5
- validmind/tests/model_validation/ragas/NoiseSensitivity.py +10 -7
- validmind/tests/model_validation/ragas/ResponseRelevancy.py +9 -6
- validmind/tests/model_validation/ragas/SemanticSimilarity.py +7 -4
- validmind/tests/model_validation/sklearn/AdjustedMutualInformation.py +5 -1
- validmind/tests/model_validation/sklearn/AdjustedRandIndex.py +5 -1
- validmind/tests/model_validation/sklearn/CalibrationCurve.py +5 -1
- validmind/tests/model_validation/sklearn/ClassifierPerformance.py +5 -1
- validmind/tests/model_validation/sklearn/ClusterCosineSimilarity.py +5 -1
- validmind/tests/model_validation/sklearn/ClusterPerformanceMetrics.py +5 -1
- validmind/tests/model_validation/sklearn/CompletenessScore.py +5 -1
- validmind/tests/model_validation/sklearn/ConfusionMatrix.py +4 -1
- validmind/tests/model_validation/sklearn/FeatureImportance.py +5 -1
- validmind/tests/model_validation/sklearn/FowlkesMallowsScore.py +5 -1
- validmind/tests/model_validation/sklearn/HomogeneityScore.py +5 -1
- validmind/tests/model_validation/sklearn/HyperParametersTuning.py +2 -4
- validmind/tests/model_validation/sklearn/KMeansClustersOptimization.py +3 -3
- validmind/tests/model_validation/sklearn/MinimumAccuracy.py +5 -1
- validmind/tests/model_validation/sklearn/MinimumF1Score.py +5 -1
- validmind/tests/model_validation/sklearn/MinimumROCAUCScore.py +5 -1
- validmind/tests/model_validation/sklearn/ModelParameters.py +6 -1
- validmind/tests/model_validation/sklearn/ModelsPerformanceComparison.py +5 -1
- validmind/tests/model_validation/sklearn/OverfitDiagnosis.py +3 -2
- validmind/tests/model_validation/sklearn/PermutationFeatureImportance.py +4 -4
- validmind/tests/model_validation/sklearn/PopulationStabilityIndex.py +2 -2
- validmind/tests/model_validation/sklearn/PrecisionRecallCurve.py +5 -1
- validmind/tests/model_validation/sklearn/ROCCurve.py +3 -1
- validmind/tests/model_validation/sklearn/RegressionErrors.py +6 -1
- validmind/tests/model_validation/sklearn/RegressionErrorsComparison.py +6 -1
- validmind/tests/model_validation/sklearn/RegressionPerformance.py +5 -1
- validmind/tests/model_validation/sklearn/RegressionR2Square.py +6 -1
- validmind/tests/model_validation/sklearn/RegressionR2SquareComparison.py +6 -1
- validmind/tests/model_validation/sklearn/RobustnessDiagnosis.py +2 -2
- validmind/tests/model_validation/sklearn/ScoreProbabilityAlignment.py +3 -1
- validmind/tests/model_validation/sklearn/SilhouettePlot.py +6 -1
- validmind/tests/model_validation/sklearn/TrainingTestDegradation.py +2 -2
- validmind/tests/model_validation/sklearn/VMeasure.py +5 -1
- validmind/tests/model_validation/sklearn/WeakspotsDiagnosis.py +6 -5
- validmind/tests/model_validation/statsmodels/AutoARIMA.py +3 -1
- validmind/tests/model_validation/statsmodels/CumulativePredictionProbabilities.py +6 -1
- validmind/tests/model_validation/statsmodels/DurbinWatsonTest.py +6 -1
- validmind/tests/model_validation/statsmodels/GINITable.py +4 -1
- validmind/tests/model_validation/statsmodels/KolmogorovSmirnov.py +5 -1
- validmind/tests/model_validation/statsmodels/Lilliefors.py +3 -1
- validmind/tests/model_validation/statsmodels/PredictionProbabilitiesHistogram.py +6 -2
- validmind/tests/model_validation/statsmodels/RegressionCoeffs.py +4 -1
- validmind/tests/model_validation/statsmodels/RegressionFeatureSignificance.py +7 -2
- validmind/tests/model_validation/statsmodels/RegressionModelForecastPlot.py +5 -4
- validmind/tests/model_validation/statsmodels/RegressionModelForecastPlotLevels.py +4 -1
- validmind/tests/model_validation/statsmodels/RegressionModelSensitivityPlot.py +3 -2
- validmind/tests/model_validation/statsmodels/RegressionModelSummary.py +5 -1
- validmind/tests/model_validation/statsmodels/RegressionPermutationFeatureImportance.py +3 -1
- validmind/tests/model_validation/statsmodels/ScorecardHistogram.py +6 -1
- validmind/tests/ongoing_monitoring/CalibrationCurveDrift.py +2 -2
- validmind/tests/ongoing_monitoring/ClassDiscriminationDrift.py +2 -2
- validmind/tests/ongoing_monitoring/ClassImbalanceDrift.py +2 -2
- validmind/tests/ongoing_monitoring/ClassificationAccuracyDrift.py +2 -2
- validmind/tests/ongoing_monitoring/ConfusionMatrixDrift.py +2 -2
- validmind/tests/ongoing_monitoring/CumulativePredictionProbabilitiesDrift.py +2 -2
- validmind/tests/ongoing_monitoring/FeatureDrift.py +5 -2
- validmind/tests/ongoing_monitoring/PredictionAcrossEachFeature.py +6 -1
- validmind/tests/ongoing_monitoring/PredictionCorrelation.py +8 -1
- validmind/tests/ongoing_monitoring/PredictionProbabilitiesHistogramDrift.py +2 -2
- validmind/tests/ongoing_monitoring/PredictionQuantilesAcrossFeatures.py +6 -1
- validmind/tests/ongoing_monitoring/ROCCurveDrift.py +4 -2
- validmind/tests/ongoing_monitoring/ScoreBandsDrift.py +2 -2
- validmind/tests/ongoing_monitoring/ScorecardHistogramDrift.py +2 -2
- validmind/tests/ongoing_monitoring/TargetPredictionDistributionPlot.py +8 -1
- validmind/tests/output.py +9 -2
- validmind/tests/plots/BoxPlot.py +260 -0
- validmind/tests/plots/CorrelationHeatmap.py +235 -0
- validmind/tests/plots/HistogramPlot.py +233 -0
- validmind/tests/plots/ViolinPlot.py +125 -0
- validmind/tests/plots/__init__.py +0 -0
- validmind/tests/prompt_validation/Bias.py +5 -1
- validmind/tests/prompt_validation/Clarity.py +5 -1
- validmind/tests/prompt_validation/Conciseness.py +5 -1
- validmind/tests/prompt_validation/Delimitation.py +5 -1
- validmind/tests/prompt_validation/NegativeInstruction.py +5 -1
- validmind/tests/prompt_validation/Robustness.py +5 -1
- validmind/tests/prompt_validation/Specificity.py +5 -1
- validmind/tests/stats/CorrelationAnalysis.py +251 -0
- validmind/tests/stats/DescriptiveStats.py +197 -0
- validmind/tests/stats/NormalityTests.py +147 -0
- validmind/tests/stats/OutlierDetection.py +173 -0
- validmind/tests/stats/__init__.py +0 -0
- validmind/unit_metrics/classification/Accuracy.py +2 -1
- validmind/unit_metrics/classification/F1.py +2 -1
- validmind/unit_metrics/classification/Precision.py +2 -1
- validmind/unit_metrics/classification/ROC_AUC.py +2 -1
- validmind/unit_metrics/classification/Recall.py +2 -1
- validmind/unit_metrics/classification/individual/AbsoluteError.py +42 -0
- validmind/unit_metrics/classification/individual/BrierScore.py +56 -0
- validmind/unit_metrics/classification/individual/CalibrationError.py +77 -0
- validmind/unit_metrics/classification/individual/ClassBalance.py +65 -0
- validmind/unit_metrics/classification/individual/Confidence.py +52 -0
- validmind/unit_metrics/classification/individual/Correctness.py +41 -0
- validmind/unit_metrics/classification/individual/LogLoss.py +61 -0
- validmind/unit_metrics/classification/individual/OutlierScore.py +86 -0
- validmind/unit_metrics/classification/individual/ProbabilityError.py +54 -0
- validmind/unit_metrics/classification/individual/Uncertainty.py +60 -0
- validmind/unit_metrics/classification/individual/__init__.py +0 -0
- validmind/unit_metrics/regression/AdjustedRSquaredScore.py +2 -1
- validmind/unit_metrics/regression/GiniCoefficient.py +2 -1
- validmind/unit_metrics/regression/HuberLoss.py +2 -1
- validmind/unit_metrics/regression/KolmogorovSmirnovStatistic.py +2 -1
- validmind/unit_metrics/regression/MeanAbsoluteError.py +2 -1
- validmind/unit_metrics/regression/MeanAbsolutePercentageError.py +2 -1
- validmind/unit_metrics/regression/MeanBiasDeviation.py +2 -1
- validmind/unit_metrics/regression/MeanSquaredError.py +2 -1
- validmind/unit_metrics/regression/QuantileLoss.py +1 -1
- validmind/unit_metrics/regression/RSquaredScore.py +2 -1
- validmind/unit_metrics/regression/RootMeanSquaredError.py +2 -1
- validmind/vm_models/dataset/dataset.py +291 -38
- validmind/vm_models/result/result.py +26 -4
- {validmind-2.8.28.dist-info → validmind-2.9.1.dist-info}/METADATA +2 -2
- {validmind-2.8.28.dist-info → validmind-2.9.1.dist-info}/RECORD +233 -212
- {validmind-2.8.28.dist-info → validmind-2.9.1.dist-info}/LICENSE +0 -0
- {validmind-2.8.28.dist-info → validmind-2.9.1.dist-info}/WHEEL +0 -0
- {validmind-2.8.28.dist-info → validmind-2.9.1.dist-info}/entry_points.txt +0 -0
@@ -2,17 +2,24 @@
|
|
2
2
|
# See the LICENSE file in the root of this repository for details.
|
3
3
|
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
4
|
|
5
|
+
from typing import Tuple
|
6
|
+
|
5
7
|
import numpy as np
|
6
8
|
import pandas as pd
|
7
9
|
import plotly.graph_objects as go
|
8
10
|
from scipy.stats import norm
|
9
11
|
|
10
12
|
from validmind import RawData, tags, tasks
|
13
|
+
from validmind.vm_models import VMDataset, VMModel
|
11
14
|
|
12
15
|
|
13
16
|
@tags("model_predictions", "visualization")
|
14
17
|
@tasks("regression", "time_series_forecasting")
|
15
|
-
def TimeSeriesPredictionWithCI(
|
18
|
+
def TimeSeriesPredictionWithCI(
|
19
|
+
dataset: VMDataset,
|
20
|
+
model: VMModel,
|
21
|
+
confidence: float = 0.95,
|
22
|
+
) -> Tuple[pd.DataFrame, go.Figure, RawData]:
|
16
23
|
"""
|
17
24
|
Assesses predictive accuracy and uncertainty in time series models, highlighting breaches beyond confidence
|
18
25
|
intervals.
|
@@ -2,14 +2,20 @@
|
|
2
2
|
# See the LICENSE file in the root of this repository for details.
|
3
3
|
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
4
|
|
5
|
+
from typing import Tuple
|
6
|
+
|
5
7
|
import plotly.graph_objects as go
|
6
8
|
|
7
9
|
from validmind import RawData, tags, tasks
|
10
|
+
from validmind.vm_models import VMDataset, VMModel
|
8
11
|
|
9
12
|
|
10
13
|
@tags("model_predictions", "visualization")
|
11
14
|
@tasks("regression", "time_series_forecasting")
|
12
|
-
def TimeSeriesPredictionsPlot(
|
15
|
+
def TimeSeriesPredictionsPlot(
|
16
|
+
dataset: VMDataset,
|
17
|
+
model: VMModel,
|
18
|
+
) -> Tuple[go.Figure, RawData]:
|
13
19
|
"""
|
14
20
|
Plot actual vs predicted values for time series data and generate a visual comparison for the model.
|
15
21
|
|
@@ -2,17 +2,22 @@
|
|
2
2
|
# See the LICENSE file in the root of this repository for details.
|
3
3
|
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
4
|
|
5
|
+
from typing import Optional, Tuple
|
5
6
|
|
6
7
|
import pandas as pd
|
7
8
|
import plotly.express as px
|
9
|
+
import plotly.graph_objects as go
|
8
10
|
from sklearn import metrics
|
9
11
|
|
10
12
|
from validmind import RawData, tags, tasks
|
13
|
+
from validmind.vm_models import VMDataset, VMModel
|
11
14
|
|
12
15
|
|
13
16
|
@tags("model_performance", "sklearn")
|
14
17
|
@tasks("regression", "time_series_forecasting")
|
15
|
-
def TimeSeriesR2SquareBySegments(
|
18
|
+
def TimeSeriesR2SquareBySegments(
|
19
|
+
dataset: VMDataset, model: VMModel, segments: Optional[int] = None
|
20
|
+
) -> Tuple[pd.DataFrame, go.Figure, RawData]:
|
16
21
|
"""
|
17
22
|
Evaluates the R-Squared values of regression models over specified time segments in time series data to assess
|
18
23
|
segment-wise model performance.
|
@@ -2,15 +2,20 @@
|
|
2
2
|
# See the LICENSE file in the root of this repository for details.
|
3
3
|
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
4
|
|
5
|
+
from typing import Tuple
|
6
|
+
|
5
7
|
import pandas as pd
|
6
8
|
import plotly.graph_objects as go
|
7
9
|
|
8
10
|
from validmind import RawData, tags, tasks
|
11
|
+
from validmind.vm_models import VMDataset, VMModel
|
9
12
|
|
10
13
|
|
11
14
|
@tags("nlp", "text_data", "visualization")
|
12
15
|
@tasks("text_classification", "text_summarization")
|
13
|
-
def TokenDisparity(
|
16
|
+
def TokenDisparity(
|
17
|
+
dataset: VMDataset, model: VMModel
|
18
|
+
) -> Tuple[pd.DataFrame, go.Figure, RawData]:
|
14
19
|
"""
|
15
20
|
Evaluates the token disparity between reference and generated texts, visualizing the results through histograms and
|
16
21
|
bar charts, alongside compiling a comprehensive table of descriptive statistics for token counts.
|
@@ -2,16 +2,21 @@
|
|
2
2
|
# See the LICENSE file in the root of this repository for details.
|
3
3
|
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
4
|
|
5
|
+
from typing import Tuple
|
6
|
+
|
5
7
|
import evaluate
|
6
8
|
import pandas as pd
|
7
9
|
import plotly.graph_objects as go
|
8
10
|
|
9
11
|
from validmind import RawData, tags, tasks
|
12
|
+
from validmind.vm_models import VMDataset, VMModel
|
10
13
|
|
11
14
|
|
12
15
|
@tags("nlp", "text_data", "visualization")
|
13
16
|
@tasks("text_classification", "text_summarization")
|
14
|
-
def ToxicityScore(
|
17
|
+
def ToxicityScore(
|
18
|
+
dataset: VMDataset, model: VMModel
|
19
|
+
) -> Tuple[pd.DataFrame, go.Figure, RawData]:
|
15
20
|
"""
|
16
21
|
Assesses the toxicity levels of texts generated by NLP models to identify and mitigate harmful or offensive content.
|
17
22
|
|
@@ -2,7 +2,10 @@
|
|
2
2
|
# See the LICENSE file in the root of this repository for details.
|
3
3
|
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
4
|
|
5
|
+
from typing import Tuple
|
6
|
+
|
5
7
|
import plotly.express as px
|
8
|
+
import plotly.graph_objects as go
|
6
9
|
from sklearn.cluster import KMeans
|
7
10
|
|
8
11
|
from validmind import RawData, tags, tasks
|
@@ -11,7 +14,9 @@ from validmind.vm_models import VMDataset, VMModel
|
|
11
14
|
|
12
15
|
@tags("llm", "text_data", "embeddings", "visualization")
|
13
16
|
@tasks("feature_extraction")
|
14
|
-
def ClusterDistribution(
|
17
|
+
def ClusterDistribution(
|
18
|
+
model: VMModel, dataset: VMDataset, num_clusters: int = 5
|
19
|
+
) -> Tuple[go.Figure, RawData]:
|
15
20
|
"""
|
16
21
|
Assesses the distribution of text embeddings across clusters produced by a model using KMeans clustering.
|
17
22
|
|
@@ -3,18 +3,23 @@
|
|
3
3
|
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
4
|
|
5
5
|
from itertools import combinations
|
6
|
+
from typing import List, Tuple
|
6
7
|
|
7
8
|
import numpy as np
|
8
9
|
import pandas as pd
|
9
10
|
import plotly.express as px
|
11
|
+
import plotly.graph_objects as go
|
10
12
|
from sklearn.metrics.pairwise import cosine_similarity
|
11
13
|
|
12
14
|
from validmind import RawData, tags, tasks
|
15
|
+
from validmind.vm_models import VMDataset, VMModel
|
13
16
|
|
14
17
|
|
15
18
|
@tags("visualization", "dimensionality_reduction", "embeddings")
|
16
19
|
@tasks("text_qa", "text_generation", "text_summarization")
|
17
|
-
def CosineSimilarityComparison(
|
20
|
+
def CosineSimilarityComparison(
|
21
|
+
dataset: VMDataset, models: List[VMModel]
|
22
|
+
) -> Tuple[go.Figure, RawData, pd.DataFrame]:
|
18
23
|
"""
|
19
24
|
Assesses the similarity between embeddings generated by different models using Cosine Similarity, providing both
|
20
25
|
statistical and visual insights.
|
@@ -2,7 +2,10 @@
|
|
2
2
|
# See the LICENSE file in the root of this repository for details.
|
3
3
|
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
4
|
|
5
|
+
from typing import Tuple
|
6
|
+
|
5
7
|
import plotly.express as px
|
8
|
+
import plotly.graph_objects as go
|
6
9
|
from sklearn.metrics.pairwise import cosine_similarity
|
7
10
|
|
8
11
|
from validmind import RawData, tags, tasks
|
@@ -11,7 +14,9 @@ from validmind.vm_models import VMDataset, VMModel
|
|
11
14
|
|
12
15
|
@tags("llm", "text_data", "embeddings", "visualization")
|
13
16
|
@tasks("feature_extraction")
|
14
|
-
def CosineSimilarityDistribution(
|
17
|
+
def CosineSimilarityDistribution(
|
18
|
+
dataset: VMDataset, model: VMModel
|
19
|
+
) -> Tuple[go.Figure, RawData]:
|
15
20
|
"""
|
16
21
|
Assesses the similarity between predicted text embeddings from a model using a Cosine Similarity distribution
|
17
22
|
histogram.
|
@@ -2,24 +2,28 @@
|
|
2
2
|
# See the LICENSE file in the root of this repository for details.
|
3
3
|
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
4
|
|
5
|
+
from typing import Tuple
|
6
|
+
|
5
7
|
import numpy as np
|
6
8
|
import plotly.express as px
|
9
|
+
import plotly.graph_objects as go
|
7
10
|
from sklearn.metrics.pairwise import cosine_similarity
|
8
11
|
|
9
12
|
from validmind import RawData, tags, tasks
|
13
|
+
from validmind.vm_models import VMDataset, VMModel
|
10
14
|
|
11
15
|
|
12
16
|
@tags("visualization", "dimensionality_reduction", "embeddings")
|
13
17
|
@tasks("text_qa", "text_generation", "text_summarization")
|
14
18
|
def CosineSimilarityHeatmap(
|
15
|
-
dataset,
|
16
|
-
model,
|
19
|
+
dataset: VMDataset,
|
20
|
+
model: VMModel,
|
17
21
|
title="Cosine Similarity Matrix",
|
18
22
|
color="Cosine Similarity",
|
19
23
|
xaxis_title="Index",
|
20
24
|
yaxis_title="Index",
|
21
25
|
color_scale="Blues",
|
22
|
-
):
|
26
|
+
) -> Tuple[go.Figure, RawData]:
|
23
27
|
"""
|
24
28
|
Generates an interactive heatmap to visualize the cosine similarities among embeddings derived from a given model.
|
25
29
|
|
@@ -2,8 +2,11 @@
|
|
2
2
|
# See the LICENSE file in the root of this repository for details.
|
3
3
|
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
4
|
|
5
|
+
from typing import Tuple
|
6
|
+
|
5
7
|
import numpy as np
|
6
8
|
import plotly.express as px
|
9
|
+
import plotly.graph_objects as go
|
7
10
|
|
8
11
|
from validmind import RawData, tags, tasks
|
9
12
|
from validmind.vm_models import VMDataset, VMModel
|
@@ -11,7 +14,9 @@ from validmind.vm_models import VMDataset, VMModel
|
|
11
14
|
|
12
15
|
@tags("llm", "text_data", "embeddings", "visualization")
|
13
16
|
@tasks("feature_extraction")
|
14
|
-
def DescriptiveAnalytics(
|
17
|
+
def DescriptiveAnalytics(
|
18
|
+
dataset: VMDataset, model: VMModel
|
19
|
+
) -> Tuple[go.Figure, go.Figure, go.Figure, RawData]:
|
15
20
|
"""
|
16
21
|
Evaluates statistical properties of text embeddings in an ML model via mean, median, and standard deviation
|
17
22
|
histograms.
|
@@ -2,9 +2,10 @@
|
|
2
2
|
# See the LICENSE file in the root of this repository for details.
|
3
3
|
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
4
|
|
5
|
-
from typing import Union
|
5
|
+
from typing import Tuple, Union
|
6
6
|
|
7
7
|
import plotly.express as px
|
8
|
+
import plotly.graph_objects as go
|
8
9
|
from sklearn.manifold import TSNE
|
9
10
|
|
10
11
|
from validmind import RawData, tags, tasks
|
@@ -17,11 +18,11 @@ logger = get_logger(__name__)
|
|
17
18
|
@tags("llm", "text_data", "embeddings", "visualization")
|
18
19
|
@tasks("feature_extraction")
|
19
20
|
def EmbeddingsVisualization2D(
|
20
|
-
model: VMModel,
|
21
21
|
dataset: VMDataset,
|
22
|
+
model: VMModel,
|
22
23
|
cluster_column: Union[str, None] = None,
|
23
24
|
perplexity: int = 30,
|
24
|
-
):
|
25
|
+
) -> Tuple[go.Figure, RawData]:
|
25
26
|
"""
|
26
27
|
Visualizes 2D representation of text embeddings generated by a model using t-SNE technique.
|
27
28
|
|
@@ -3,18 +3,23 @@
|
|
3
3
|
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
4
|
|
5
5
|
from itertools import combinations
|
6
|
+
from typing import List, Tuple
|
6
7
|
|
7
8
|
import numpy as np
|
8
9
|
import pandas as pd
|
9
10
|
import plotly.express as px
|
11
|
+
import plotly.graph_objects as go
|
10
12
|
from sklearn.metrics.pairwise import euclidean_distances
|
11
13
|
|
12
14
|
from validmind import RawData, tags, tasks
|
15
|
+
from validmind.vm_models import VMDataset, VMModel
|
13
16
|
|
14
17
|
|
15
18
|
@tags("visualization", "dimensionality_reduction", "embeddings")
|
16
19
|
@tasks("text_qa", "text_generation", "text_summarization")
|
17
|
-
def EuclideanDistanceComparison(
|
20
|
+
def EuclideanDistanceComparison(
|
21
|
+
dataset: VMDataset, models: List[VMModel]
|
22
|
+
) -> Tuple[pd.DataFrame, go.Figure, RawData]:
|
18
23
|
"""
|
19
24
|
Assesses and visualizes the dissimilarity between model embeddings using Euclidean distance, providing insights
|
20
25
|
into model behavior and potential redundancy or diversity.
|
@@ -2,24 +2,28 @@
|
|
2
2
|
# See the LICENSE file in the root of this repository for details.
|
3
3
|
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
4
|
|
5
|
+
from typing import Tuple
|
6
|
+
|
5
7
|
import numpy as np
|
6
8
|
import plotly.express as px
|
9
|
+
import plotly.graph_objects as go
|
7
10
|
from sklearn.metrics.pairwise import euclidean_distances
|
8
11
|
|
9
12
|
from validmind import RawData, tags, tasks
|
13
|
+
from validmind.vm_models import VMDataset, VMModel
|
10
14
|
|
11
15
|
|
12
16
|
@tags("visualization", "dimensionality_reduction", "embeddings")
|
13
17
|
@tasks("text_qa", "text_generation", "text_summarization")
|
14
18
|
def EuclideanDistanceHeatmap(
|
15
|
-
dataset,
|
16
|
-
model,
|
19
|
+
dataset: VMDataset,
|
20
|
+
model: VMModel,
|
17
21
|
title="Euclidean Distance Matrix",
|
18
22
|
color="Euclidean Distance",
|
19
23
|
xaxis_title="Index",
|
20
24
|
yaxis_title="Index",
|
21
25
|
color_scale="Blues",
|
22
|
-
):
|
26
|
+
) -> Tuple[go.Figure, RawData]:
|
23
27
|
"""
|
24
28
|
Generates an interactive heatmap to visualize the Euclidean distances among embeddings derived from a given model.
|
25
29
|
|
@@ -3,19 +3,24 @@
|
|
3
3
|
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
4
|
|
5
5
|
import itertools
|
6
|
+
from typing import Tuple
|
6
7
|
|
7
8
|
import numpy as np
|
8
9
|
import pandas as pd
|
9
10
|
import plotly.express as px
|
11
|
+
import plotly.graph_objects as go
|
10
12
|
from sklearn.decomposition import PCA
|
11
13
|
from sklearn.preprocessing import StandardScaler
|
12
14
|
|
13
15
|
from validmind import RawData, tags, tasks
|
16
|
+
from validmind.vm_models import VMDataset, VMModel
|
14
17
|
|
15
18
|
|
16
19
|
@tags("visualization", "dimensionality_reduction", "embeddings")
|
17
20
|
@tasks("text_qa", "text_generation", "text_summarization")
|
18
|
-
def PCAComponentsPairwisePlots(
|
21
|
+
def PCAComponentsPairwisePlots(
|
22
|
+
dataset: VMDataset, model: VMModel, n_components: int = 3
|
23
|
+
) -> Tuple[go.Figure, RawData]:
|
19
24
|
"""
|
20
25
|
Generates scatter plots for pairwise combinations of principal component analysis (PCA) components of model
|
21
26
|
embeddings.
|
@@ -3,7 +3,10 @@
|
|
3
3
|
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
4
|
|
5
5
|
import re
|
6
|
-
from typing import Dict
|
6
|
+
from typing import Dict, Tuple
|
7
|
+
|
8
|
+
import pandas as pd
|
9
|
+
import plotly.graph_objects as go
|
7
10
|
|
8
11
|
from validmind import RawData, tags, tasks
|
9
12
|
from validmind.vm_models import VMDataset, VMModel
|
@@ -18,7 +21,7 @@ def StabilityAnalysisKeyword(
|
|
18
21
|
model: VMModel,
|
19
22
|
keyword_dict: Dict[str, str],
|
20
23
|
mean_similarity_threshold: float = 0.7,
|
21
|
-
):
|
24
|
+
) -> Tuple[go.Figure, pd.DataFrame, RawData]:
|
22
25
|
"""
|
23
26
|
Evaluates robustness of embedding models to keyword swaps in the test dataset.
|
24
27
|
|
@@ -4,6 +4,10 @@
|
|
4
4
|
|
5
5
|
import random
|
6
6
|
import string
|
7
|
+
from typing import Tuple
|
8
|
+
|
9
|
+
import pandas as pd
|
10
|
+
import plotly.graph_objects as go
|
7
11
|
|
8
12
|
from validmind import RawData, tags, tasks
|
9
13
|
from validmind.vm_models import VMDataset, VMModel
|
@@ -69,7 +73,7 @@ def StabilityAnalysisRandomNoise(
|
|
69
73
|
model: VMModel,
|
70
74
|
probability: float = 0.02,
|
71
75
|
mean_similarity_threshold: float = 0.7,
|
72
|
-
):
|
76
|
+
) -> Tuple[go.Figure, pd.DataFrame, RawData]:
|
73
77
|
"""
|
74
78
|
Assesses the robustness of text embeddings models to random noise introduced via text perturbations.
|
75
79
|
|
@@ -3,8 +3,11 @@
|
|
3
3
|
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
4
|
|
5
5
|
import random
|
6
|
+
from typing import Tuple
|
6
7
|
|
7
8
|
import nltk
|
9
|
+
import pandas as pd
|
10
|
+
import plotly.graph_objects as go
|
8
11
|
from nltk.corpus import wordnet as wn
|
9
12
|
|
10
13
|
from validmind import RawData, tags, tasks
|
@@ -20,7 +23,7 @@ def StabilityAnalysisSynonyms(
|
|
20
23
|
model: VMModel,
|
21
24
|
probability: float = 0.02,
|
22
25
|
mean_similarity_threshold: float = 0.7,
|
23
|
-
):
|
26
|
+
) -> Tuple[go.Figure, pd.DataFrame, RawData]:
|
24
27
|
"""
|
25
28
|
Evaluates the stability of text embeddings models when words in test data are replaced by their synonyms randomly.
|
26
29
|
|
@@ -2,6 +2,10 @@
|
|
2
2
|
# See the LICENSE file in the root of this repository for details.
|
3
3
|
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
4
|
|
5
|
+
from typing import Tuple
|
6
|
+
|
7
|
+
import pandas as pd
|
8
|
+
import plotly.graph_objects as go
|
5
9
|
from transformers import MarianMTModel, MarianTokenizer
|
6
10
|
|
7
11
|
from validmind import RawData, tags, tasks
|
@@ -21,7 +25,7 @@ def StabilityAnalysisTranslation(
|
|
21
25
|
source_lang: str = "en",
|
22
26
|
target_lang: str = "fr",
|
23
27
|
mean_similarity_threshold: float = 0.7,
|
24
|
-
):
|
28
|
+
) -> Tuple[go.Figure, pd.DataFrame, RawData]:
|
25
29
|
"""
|
26
30
|
Evaluates robustness of text embeddings models to noise introduced by translating the original text to another
|
27
31
|
language and back.
|
@@ -3,25 +3,28 @@
|
|
3
3
|
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
4
|
|
5
5
|
import itertools
|
6
|
+
from typing import Tuple
|
6
7
|
|
7
8
|
import numpy as np
|
8
9
|
import pandas as pd
|
9
10
|
import plotly.express as px
|
11
|
+
import plotly.graph_objects as go
|
10
12
|
from sklearn.manifold import TSNE
|
11
13
|
from sklearn.preprocessing import StandardScaler
|
12
14
|
|
13
15
|
from validmind import RawData, tags, tasks
|
16
|
+
from validmind.vm_models import VMDataset, VMModel
|
14
17
|
|
15
18
|
|
16
19
|
@tags("visualization", "dimensionality_reduction", "embeddings")
|
17
20
|
@tasks("text_qa", "text_generation", "text_summarization")
|
18
21
|
def TSNEComponentsPairwisePlots(
|
19
|
-
dataset,
|
20
|
-
model,
|
21
|
-
n_components=2,
|
22
|
-
perplexity=30,
|
23
|
-
title="t-SNE",
|
24
|
-
):
|
22
|
+
dataset: VMDataset,
|
23
|
+
model: VMModel,
|
24
|
+
n_components: int = 2,
|
25
|
+
perplexity: int = 30,
|
26
|
+
title: str = "t-SNE",
|
27
|
+
) -> Tuple[go.Figure, RawData]:
|
25
28
|
"""
|
26
29
|
Creates scatter plots for pairwise combinations of t-SNE components to visualize embeddings and highlight potential
|
27
30
|
clustering structures.
|
@@ -3,12 +3,15 @@
|
|
3
3
|
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
4
|
|
5
5
|
import warnings
|
6
|
+
from typing import Dict, Tuple
|
6
7
|
|
7
8
|
import plotly.express as px
|
9
|
+
import plotly.graph_objects as go
|
8
10
|
from datasets import Dataset
|
9
11
|
|
10
12
|
from validmind import RawData, tags, tasks
|
11
13
|
from validmind.errors import MissingDependencyError
|
14
|
+
from validmind.vm_models import VMDataset
|
12
15
|
|
13
16
|
from .utils import get_ragas_config, get_renamed_columns
|
14
17
|
|
@@ -30,13 +33,13 @@ except ImportError as e:
|
|
30
33
|
@tags("ragas", "llm")
|
31
34
|
@tasks("text_qa", "text_generation", "text_summarization")
|
32
35
|
def AnswerCorrectness(
|
33
|
-
dataset,
|
34
|
-
user_input_column="user_input",
|
35
|
-
response_column="response",
|
36
|
-
reference_column="reference",
|
36
|
+
dataset: VMDataset,
|
37
|
+
user_input_column: str = "user_input",
|
38
|
+
response_column: str = "response",
|
39
|
+
reference_column: str = "reference",
|
37
40
|
judge_llm=None,
|
38
41
|
judge_embeddings=None,
|
39
|
-
):
|
42
|
+
) -> Tuple[Dict[str, list], go.Figure, go.Figure, RawData]:
|
40
43
|
"""
|
41
44
|
Evaluates the correctness of answers in a dataset with respect to the provided ground
|
42
45
|
truths and visualizes the results in a histogram.
|
@@ -3,12 +3,15 @@
|
|
3
3
|
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
4
|
|
5
5
|
import warnings
|
6
|
+
from typing import Dict, List, Optional, Tuple
|
6
7
|
|
7
8
|
import plotly.express as px
|
9
|
+
import plotly.graph_objects as go
|
8
10
|
from datasets import Dataset
|
9
11
|
|
10
12
|
from validmind import RawData, tags, tasks
|
11
13
|
from validmind.errors import MissingDependencyError
|
14
|
+
from validmind.vm_models import VMDataset
|
12
15
|
|
13
16
|
from .utils import get_ragas_config, get_renamed_columns
|
14
17
|
|
@@ -39,21 +42,21 @@ LOWER_IS_BETTER_ASPECTS = ["harmfulness", "maliciousness"]
|
|
39
42
|
@tags("ragas", "llm", "qualitative")
|
40
43
|
@tasks("text_summarization", "text_generation", "text_qa")
|
41
44
|
def AspectCritic(
|
42
|
-
dataset,
|
43
|
-
user_input_column="user_input",
|
44
|
-
response_column="response",
|
45
|
-
retrieved_contexts_column=None,
|
46
|
-
aspects:
|
45
|
+
dataset: VMDataset,
|
46
|
+
user_input_column: str = "user_input",
|
47
|
+
response_column: str = "response",
|
48
|
+
retrieved_contexts_column: Optional[str] = None,
|
49
|
+
aspects: List[str] = [
|
47
50
|
"coherence",
|
48
51
|
"conciseness",
|
49
52
|
"correctness",
|
50
53
|
"harmfulness",
|
51
54
|
"maliciousness",
|
52
55
|
],
|
53
|
-
additional_aspects:
|
56
|
+
additional_aspects: Optional[List[Tuple[str, str]]] = None,
|
54
57
|
judge_llm=None,
|
55
58
|
judge_embeddings=None,
|
56
|
-
):
|
59
|
+
) -> Tuple[Dict[str, list], go.Figure, RawData]:
|
57
60
|
"""
|
58
61
|
Evaluates generations against the following aspects: harmfulness, maliciousness,
|
59
62
|
coherence, correctness, and conciseness.
|
@@ -146,8 +149,8 @@ def AspectCritic(
|
|
146
149
|
|
147
150
|
if retrieved_contexts_column:
|
148
151
|
required_columns["retrieved_contexts"] = retrieved_contexts_column
|
149
|
-
|
150
152
|
df = get_renamed_columns(dataset._df, required_columns)
|
153
|
+
df = df[required_columns.keys()]
|
151
154
|
|
152
155
|
custom_aspects = (
|
153
156
|
[
|
@@ -3,12 +3,15 @@
|
|
3
3
|
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
4
|
|
5
5
|
import warnings
|
6
|
+
from typing import Dict, Tuple
|
6
7
|
|
7
8
|
import plotly.express as px
|
9
|
+
import plotly.graph_objects as go
|
8
10
|
from datasets import Dataset
|
9
11
|
|
10
12
|
from validmind import RawData, tags, tasks
|
11
13
|
from validmind.errors import MissingDependencyError
|
14
|
+
from validmind.vm_models import VMDataset
|
12
15
|
|
13
16
|
from .utils import get_ragas_config, get_renamed_columns
|
14
17
|
|
@@ -30,12 +33,12 @@ except ImportError as e:
|
|
30
33
|
@tags("ragas", "llm", "retrieval_performance")
|
31
34
|
@tasks("text_qa", "text_generation", "text_summarization")
|
32
35
|
def ContextEntityRecall(
|
33
|
-
dataset,
|
36
|
+
dataset: VMDataset,
|
34
37
|
retrieved_contexts_column: str = "retrieved_contexts",
|
35
38
|
reference_column: str = "reference",
|
36
39
|
judge_llm=None,
|
37
40
|
judge_embeddings=None,
|
38
|
-
):
|
41
|
+
) -> Tuple[Dict[str, list], go.Figure, go.Figure, RawData]:
|
39
42
|
"""
|
40
43
|
Evaluates the context entity recall for dataset entries and visualizes the results.
|
41
44
|
|
@@ -3,12 +3,15 @@
|
|
3
3
|
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
4
|
|
5
5
|
import warnings
|
6
|
+
from typing import Dict, Tuple
|
6
7
|
|
7
8
|
import plotly.express as px
|
9
|
+
import plotly.graph_objects as go
|
8
10
|
from datasets import Dataset
|
9
11
|
|
10
12
|
from validmind import RawData, tags, tasks
|
11
13
|
from validmind.errors import MissingDependencyError
|
14
|
+
from validmind.vm_models import VMDataset
|
12
15
|
|
13
16
|
from .utils import get_ragas_config, get_renamed_columns
|
14
17
|
|
@@ -30,13 +33,13 @@ except ImportError as e:
|
|
30
33
|
@tags("ragas", "llm", "retrieval_performance")
|
31
34
|
@tasks("text_qa", "text_generation", "text_summarization", "text_classification")
|
32
35
|
def ContextPrecision(
|
33
|
-
dataset,
|
36
|
+
dataset: VMDataset,
|
34
37
|
user_input_column: str = "user_input",
|
35
38
|
retrieved_contexts_column: str = "retrieved_contexts",
|
36
39
|
reference_column: str = "reference",
|
37
40
|
judge_llm=None,
|
38
41
|
judge_embeddings=None,
|
39
|
-
)
|
42
|
+
) -> Tuple[Dict[str, list], go.Figure, go.Figure, RawData]:
|
40
43
|
"""
|
41
44
|
Context Precision is a metric that evaluates whether all of the ground-truth
|
42
45
|
relevant items present in the contexts are ranked higher or not. Ideally all the
|
@@ -3,12 +3,15 @@
|
|
3
3
|
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
4
|
|
5
5
|
import warnings
|
6
|
+
from typing import Dict, Tuple
|
6
7
|
|
7
8
|
import plotly.express as px
|
9
|
+
import plotly.graph_objects as go
|
8
10
|
from datasets import Dataset
|
9
11
|
|
10
12
|
from validmind import RawData, tags, tasks
|
11
13
|
from validmind.errors import MissingDependencyError
|
14
|
+
from validmind.vm_models import VMDataset
|
12
15
|
|
13
16
|
from .utils import get_ragas_config, get_renamed_columns
|
14
17
|
|
@@ -30,13 +33,13 @@ except ImportError as e:
|
|
30
33
|
@tags("ragas", "llm", "retrieval_performance")
|
31
34
|
@tasks("text_qa", "text_generation", "text_summarization", "text_classification")
|
32
35
|
def ContextPrecisionWithoutReference(
|
33
|
-
dataset,
|
36
|
+
dataset: VMDataset,
|
34
37
|
user_input_column: str = "user_input",
|
35
38
|
retrieved_contexts_column: str = "retrieved_contexts",
|
36
39
|
response_column: str = "response",
|
37
40
|
judge_llm=None,
|
38
41
|
judge_embeddings=None,
|
39
|
-
)
|
42
|
+
) -> Tuple[Dict[str, list], go.Figure, go.Figure, RawData]:
|
40
43
|
"""
|
41
44
|
Context Precision Without Reference is a metric used to evaluate the relevance of
|
42
45
|
retrieved contexts compared to the expected response for a given user input. This
|