validmind 2.8.27__py3-none-any.whl → 2.8.29__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- validmind/__version__.py +1 -1
- validmind/ai/utils.py +1 -1
- validmind/models/function.py +11 -3
- validmind/tests/data_validation/ACFandPACFPlot.py +3 -1
- validmind/tests/data_validation/ADF.py +3 -1
- validmind/tests/data_validation/AutoAR.py +3 -1
- validmind/tests/data_validation/AutoMA.py +5 -1
- validmind/tests/data_validation/AutoStationarity.py +5 -1
- validmind/tests/data_validation/BivariateScatterPlots.py +3 -1
- validmind/tests/data_validation/BoxPierce.py +4 -1
- validmind/tests/data_validation/ChiSquaredFeaturesTable.py +1 -1
- validmind/tests/data_validation/ClassImbalance.py +1 -1
- validmind/tests/data_validation/DatasetDescription.py +4 -1
- validmind/tests/data_validation/DatasetSplit.py +3 -2
- validmind/tests/data_validation/DescriptiveStatistics.py +3 -1
- validmind/tests/data_validation/DickeyFullerGLS.py +3 -1
- validmind/tests/data_validation/Duplicates.py +3 -1
- validmind/tests/data_validation/EngleGrangerCoint.py +6 -1
- validmind/tests/data_validation/FeatureTargetCorrelationPlot.py +3 -1
- validmind/tests/data_validation/HighCardinality.py +3 -1
- validmind/tests/data_validation/HighPearsonCorrelation.py +4 -1
- validmind/tests/data_validation/IQROutliersBarPlot.py +4 -1
- validmind/tests/data_validation/IQROutliersTable.py +6 -1
- validmind/tests/data_validation/IsolationForestOutliers.py +3 -1
- validmind/tests/data_validation/JarqueBera.py +3 -1
- validmind/tests/data_validation/KPSS.py +3 -1
- validmind/tests/data_validation/LJungBox.py +3 -1
- validmind/tests/data_validation/LaggedCorrelationHeatmap.py +6 -1
- validmind/tests/data_validation/MissingValues.py +5 -1
- validmind/tests/data_validation/MissingValuesBarPlot.py +3 -1
- validmind/tests/data_validation/MutualInformation.py +4 -1
- validmind/tests/data_validation/PearsonCorrelationMatrix.py +3 -1
- validmind/tests/data_validation/PhillipsPerronArch.py +3 -1
- validmind/tests/data_validation/ProtectedClassesCombination.py +5 -1
- validmind/tests/data_validation/ProtectedClassesDescription.py +5 -1
- validmind/tests/data_validation/ProtectedClassesDisparity.py +5 -3
- validmind/tests/data_validation/ProtectedClassesThresholdOptimizer.py +9 -2
- validmind/tests/data_validation/RollingStatsPlot.py +5 -1
- validmind/tests/data_validation/RunsTest.py +1 -1
- validmind/tests/data_validation/ScatterPlot.py +2 -1
- validmind/tests/data_validation/ScoreBandDefaultRates.py +3 -1
- validmind/tests/data_validation/SeasonalDecompose.py +6 -1
- validmind/tests/data_validation/ShapiroWilk.py +4 -1
- validmind/tests/data_validation/Skewness.py +3 -1
- validmind/tests/data_validation/SpreadPlot.py +3 -1
- validmind/tests/data_validation/TabularCategoricalBarPlots.py +4 -1
- validmind/tests/data_validation/TabularDateTimeHistograms.py +3 -1
- validmind/tests/data_validation/TabularDescriptionTables.py +4 -1
- validmind/tests/data_validation/TabularNumericalHistograms.py +3 -1
- validmind/tests/data_validation/TargetRateBarPlots.py +4 -1
- validmind/tests/data_validation/TimeSeriesDescription.py +1 -1
- validmind/tests/data_validation/TimeSeriesDescriptiveStatistics.py +1 -1
- validmind/tests/data_validation/TimeSeriesFrequency.py +5 -1
- validmind/tests/data_validation/TimeSeriesHistogram.py +4 -1
- validmind/tests/data_validation/TimeSeriesLinePlot.py +3 -1
- validmind/tests/data_validation/TimeSeriesMissingValues.py +6 -1
- validmind/tests/data_validation/TimeSeriesOutliers.py +5 -1
- validmind/tests/data_validation/TooManyZeroValues.py +6 -1
- validmind/tests/data_validation/UniqueRows.py +5 -1
- validmind/tests/data_validation/WOEBinPlots.py +4 -1
- validmind/tests/data_validation/WOEBinTable.py +5 -1
- validmind/tests/data_validation/ZivotAndrewsArch.py +3 -1
- validmind/tests/data_validation/nlp/CommonWords.py +2 -1
- validmind/tests/data_validation/nlp/Hashtags.py +2 -1
- validmind/tests/data_validation/nlp/LanguageDetection.py +4 -1
- validmind/tests/data_validation/nlp/Mentions.py +3 -1
- validmind/tests/data_validation/nlp/PolarityAndSubjectivity.py +6 -1
- validmind/tests/data_validation/nlp/Punctuations.py +2 -1
- validmind/tests/data_validation/nlp/Sentiment.py +3 -1
- validmind/tests/data_validation/nlp/StopWords.py +2 -1
- validmind/tests/data_validation/nlp/TextDescription.py +3 -1
- validmind/tests/data_validation/nlp/Toxicity.py +3 -1
- validmind/tests/load.py +91 -17
- validmind/tests/model_validation/BertScore.py +6 -3
- validmind/tests/model_validation/BleuScore.py +6 -1
- validmind/tests/model_validation/ClusterSizeDistribution.py +5 -1
- validmind/tests/model_validation/ContextualRecall.py +6 -1
- validmind/tests/model_validation/FeaturesAUC.py +5 -1
- validmind/tests/model_validation/MeteorScore.py +6 -1
- validmind/tests/model_validation/ModelMetadata.py +2 -1
- validmind/tests/model_validation/ModelPredictionResiduals.py +10 -2
- validmind/tests/model_validation/RegardScore.py +7 -1
- validmind/tests/model_validation/RegressionResidualsPlot.py +5 -1
- validmind/tests/model_validation/RougeScore.py +8 -1
- validmind/tests/model_validation/TimeSeriesPredictionWithCI.py +8 -1
- validmind/tests/model_validation/TimeSeriesPredictionsPlot.py +7 -1
- validmind/tests/model_validation/TimeSeriesR2SquareBySegments.py +6 -1
- validmind/tests/model_validation/TokenDisparity.py +6 -1
- validmind/tests/model_validation/ToxicityScore.py +6 -1
- validmind/tests/model_validation/embeddings/ClusterDistribution.py +6 -1
- validmind/tests/model_validation/embeddings/CosineSimilarityComparison.py +6 -1
- validmind/tests/model_validation/embeddings/CosineSimilarityDistribution.py +6 -1
- validmind/tests/model_validation/embeddings/CosineSimilarityHeatmap.py +7 -3
- validmind/tests/model_validation/embeddings/DescriptiveAnalytics.py +6 -1
- validmind/tests/model_validation/embeddings/EmbeddingsVisualization2D.py +4 -3
- validmind/tests/model_validation/embeddings/EuclideanDistanceComparison.py +6 -1
- validmind/tests/model_validation/embeddings/EuclideanDistanceHeatmap.py +7 -3
- validmind/tests/model_validation/embeddings/PCAComponentsPairwisePlots.py +6 -1
- validmind/tests/model_validation/embeddings/StabilityAnalysisKeyword.py +5 -2
- validmind/tests/model_validation/embeddings/StabilityAnalysisRandomNoise.py +5 -1
- validmind/tests/model_validation/embeddings/StabilityAnalysisSynonyms.py +4 -1
- validmind/tests/model_validation/embeddings/StabilityAnalysisTranslation.py +5 -1
- validmind/tests/model_validation/embeddings/TSNEComponentsPairwisePlots.py +9 -6
- validmind/tests/model_validation/ragas/AnswerCorrectness.py +8 -5
- validmind/tests/model_validation/ragas/AspectCritic.py +11 -8
- validmind/tests/model_validation/ragas/ContextEntityRecall.py +5 -2
- validmind/tests/model_validation/ragas/ContextPrecision.py +5 -2
- validmind/tests/model_validation/ragas/ContextPrecisionWithoutReference.py +5 -2
- validmind/tests/model_validation/ragas/ContextRecall.py +6 -2
- validmind/tests/model_validation/ragas/Faithfulness.py +9 -5
- validmind/tests/model_validation/ragas/NoiseSensitivity.py +10 -7
- validmind/tests/model_validation/ragas/ResponseRelevancy.py +9 -6
- validmind/tests/model_validation/ragas/SemanticSimilarity.py +7 -4
- validmind/tests/model_validation/sklearn/AdjustedMutualInformation.py +5 -1
- validmind/tests/model_validation/sklearn/AdjustedRandIndex.py +5 -1
- validmind/tests/model_validation/sklearn/CalibrationCurve.py +5 -1
- validmind/tests/model_validation/sklearn/ClassifierPerformance.py +5 -1
- validmind/tests/model_validation/sklearn/ClusterCosineSimilarity.py +5 -1
- validmind/tests/model_validation/sklearn/ClusterPerformanceMetrics.py +5 -1
- validmind/tests/model_validation/sklearn/CompletenessScore.py +5 -1
- validmind/tests/model_validation/sklearn/ConfusionMatrix.py +4 -1
- validmind/tests/model_validation/sklearn/FeatureImportance.py +5 -1
- validmind/tests/model_validation/sklearn/FowlkesMallowsScore.py +5 -1
- validmind/tests/model_validation/sklearn/HomogeneityScore.py +5 -1
- validmind/tests/model_validation/sklearn/HyperParametersTuning.py +2 -4
- validmind/tests/model_validation/sklearn/KMeansClustersOptimization.py +3 -3
- validmind/tests/model_validation/sklearn/MinimumAccuracy.py +5 -1
- validmind/tests/model_validation/sklearn/MinimumF1Score.py +5 -1
- validmind/tests/model_validation/sklearn/MinimumROCAUCScore.py +5 -1
- validmind/tests/model_validation/sklearn/ModelParameters.py +6 -1
- validmind/tests/model_validation/sklearn/ModelsPerformanceComparison.py +5 -1
- validmind/tests/model_validation/sklearn/OverfitDiagnosis.py +3 -2
- validmind/tests/model_validation/sklearn/PermutationFeatureImportance.py +4 -4
- validmind/tests/model_validation/sklearn/PopulationStabilityIndex.py +2 -2
- validmind/tests/model_validation/sklearn/PrecisionRecallCurve.py +5 -1
- validmind/tests/model_validation/sklearn/ROCCurve.py +3 -1
- validmind/tests/model_validation/sklearn/RegressionErrors.py +6 -1
- validmind/tests/model_validation/sklearn/RegressionErrorsComparison.py +6 -1
- validmind/tests/model_validation/sklearn/RegressionPerformance.py +5 -1
- validmind/tests/model_validation/sklearn/RegressionR2Square.py +6 -1
- validmind/tests/model_validation/sklearn/RegressionR2SquareComparison.py +6 -1
- validmind/tests/model_validation/sklearn/RobustnessDiagnosis.py +2 -2
- validmind/tests/model_validation/sklearn/ScoreProbabilityAlignment.py +3 -1
- validmind/tests/model_validation/sklearn/SilhouettePlot.py +6 -1
- validmind/tests/model_validation/sklearn/TrainingTestDegradation.py +2 -2
- validmind/tests/model_validation/sklearn/VMeasure.py +5 -1
- validmind/tests/model_validation/sklearn/WeakspotsDiagnosis.py +6 -5
- validmind/tests/model_validation/statsmodels/AutoARIMA.py +3 -1
- validmind/tests/model_validation/statsmodels/CumulativePredictionProbabilities.py +6 -1
- validmind/tests/model_validation/statsmodels/DurbinWatsonTest.py +6 -1
- validmind/tests/model_validation/statsmodels/GINITable.py +4 -1
- validmind/tests/model_validation/statsmodels/KolmogorovSmirnov.py +5 -1
- validmind/tests/model_validation/statsmodels/Lilliefors.py +3 -1
- validmind/tests/model_validation/statsmodels/PredictionProbabilitiesHistogram.py +6 -2
- validmind/tests/model_validation/statsmodels/RegressionCoeffs.py +4 -1
- validmind/tests/model_validation/statsmodels/RegressionFeatureSignificance.py +7 -2
- validmind/tests/model_validation/statsmodels/RegressionModelForecastPlot.py +5 -4
- validmind/tests/model_validation/statsmodels/RegressionModelForecastPlotLevels.py +4 -1
- validmind/tests/model_validation/statsmodels/RegressionModelSensitivityPlot.py +3 -2
- validmind/tests/model_validation/statsmodels/RegressionModelSummary.py +5 -1
- validmind/tests/model_validation/statsmodels/RegressionPermutationFeatureImportance.py +3 -1
- validmind/tests/model_validation/statsmodels/ScorecardHistogram.py +6 -1
- validmind/tests/ongoing_monitoring/CalibrationCurveDrift.py +2 -2
- validmind/tests/ongoing_monitoring/ClassDiscriminationDrift.py +2 -2
- validmind/tests/ongoing_monitoring/ClassImbalanceDrift.py +2 -2
- validmind/tests/ongoing_monitoring/ClassificationAccuracyDrift.py +2 -2
- validmind/tests/ongoing_monitoring/ConfusionMatrixDrift.py +2 -2
- validmind/tests/ongoing_monitoring/CumulativePredictionProbabilitiesDrift.py +2 -2
- validmind/tests/ongoing_monitoring/FeatureDrift.py +5 -2
- validmind/tests/ongoing_monitoring/PredictionAcrossEachFeature.py +6 -1
- validmind/tests/ongoing_monitoring/PredictionCorrelation.py +8 -1
- validmind/tests/ongoing_monitoring/PredictionProbabilitiesHistogramDrift.py +2 -2
- validmind/tests/ongoing_monitoring/PredictionQuantilesAcrossFeatures.py +6 -1
- validmind/tests/ongoing_monitoring/ROCCurveDrift.py +4 -2
- validmind/tests/ongoing_monitoring/ScoreBandsDrift.py +2 -2
- validmind/tests/ongoing_monitoring/ScorecardHistogramDrift.py +2 -2
- validmind/tests/ongoing_monitoring/TargetPredictionDistributionPlot.py +8 -1
- validmind/tests/prompt_validation/Bias.py +5 -1
- validmind/tests/prompt_validation/Clarity.py +5 -1
- validmind/tests/prompt_validation/Conciseness.py +5 -1
- validmind/tests/prompt_validation/Delimitation.py +5 -1
- validmind/tests/prompt_validation/NegativeInstruction.py +5 -1
- validmind/tests/prompt_validation/Robustness.py +5 -1
- validmind/tests/prompt_validation/Specificity.py +5 -1
- validmind/unit_metrics/classification/Accuracy.py +2 -1
- validmind/unit_metrics/classification/F1.py +2 -1
- validmind/unit_metrics/classification/Precision.py +2 -1
- validmind/unit_metrics/classification/ROC_AUC.py +2 -1
- validmind/unit_metrics/classification/Recall.py +2 -1
- validmind/unit_metrics/regression/AdjustedRSquaredScore.py +2 -1
- validmind/unit_metrics/regression/GiniCoefficient.py +2 -1
- validmind/unit_metrics/regression/HuberLoss.py +2 -1
- validmind/unit_metrics/regression/KolmogorovSmirnovStatistic.py +2 -1
- validmind/unit_metrics/regression/MeanAbsoluteError.py +2 -1
- validmind/unit_metrics/regression/MeanAbsolutePercentageError.py +2 -1
- validmind/unit_metrics/regression/MeanBiasDeviation.py +2 -1
- validmind/unit_metrics/regression/MeanSquaredError.py +2 -1
- validmind/unit_metrics/regression/QuantileLoss.py +1 -1
- validmind/unit_metrics/regression/RSquaredScore.py +2 -1
- validmind/unit_metrics/regression/RootMeanSquaredError.py +2 -1
- validmind/vm_models/dataset/dataset.py +145 -38
- validmind/vm_models/result/result.py +14 -12
- {validmind-2.8.27.dist-info → validmind-2.8.29.dist-info}/METADATA +1 -1
- {validmind-2.8.27.dist-info → validmind-2.8.29.dist-info}/RECORD +207 -207
- {validmind-2.8.27.dist-info → validmind-2.8.29.dist-info}/LICENSE +0 -0
- {validmind-2.8.27.dist-info → validmind-2.8.29.dist-info}/WHEEL +0 -0
- {validmind-2.8.27.dist-info → validmind-2.8.29.dist-info}/entry_points.txt +0 -0
@@ -2,15 +2,20 @@
|
|
2
2
|
# See the LICENSE file in the root of this repository for details.
|
3
3
|
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
4
|
|
5
|
+
from typing import List, Tuple
|
6
|
+
|
5
7
|
import plotly.graph_objects as go
|
6
8
|
from plotly.subplots import make_subplots
|
7
9
|
|
8
10
|
from validmind import tags, tasks
|
11
|
+
from validmind.vm_models import VMDataset, VMModel
|
9
12
|
|
10
13
|
|
11
14
|
@tags("visualization")
|
12
15
|
@tasks("monitoring")
|
13
|
-
def PredictionQuantilesAcrossFeatures(
|
16
|
+
def PredictionQuantilesAcrossFeatures(
|
17
|
+
datasets: List[VMDataset], model: VMModel
|
18
|
+
) -> Tuple[go.Figure, ...]:
|
14
19
|
"""
|
15
20
|
Assesses differences in model prediction distributions across individual features between reference
|
16
21
|
and monitoring datasets through quantile analysis.
|
@@ -2,7 +2,7 @@
|
|
2
2
|
# See the LICENSE file in the root of this repository for details.
|
3
3
|
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
4
|
|
5
|
-
from typing import List
|
5
|
+
from typing import List, Tuple
|
6
6
|
|
7
7
|
import numpy as np
|
8
8
|
import plotly.graph_objects as go
|
@@ -20,7 +20,9 @@ from validmind.vm_models import VMDataset, VMModel
|
|
20
20
|
"visualization",
|
21
21
|
)
|
22
22
|
@tasks("classification", "text_classification")
|
23
|
-
def ROCCurveDrift(
|
23
|
+
def ROCCurveDrift(
|
24
|
+
datasets: List[VMDataset], model: VMModel
|
25
|
+
) -> Tuple[go.Figure, go.Figure, RawData]:
|
24
26
|
"""
|
25
27
|
Compares ROC curves between reference and monitoring datasets.
|
26
28
|
|
@@ -2,7 +2,7 @@
|
|
2
2
|
# See the LICENSE file in the root of this repository for details.
|
3
3
|
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
4
|
|
5
|
-
from typing import List
|
5
|
+
from typing import Dict, List, Tuple
|
6
6
|
|
7
7
|
import numpy as np
|
8
8
|
import pandas as pd
|
@@ -19,7 +19,7 @@ def ScoreBandsDrift(
|
|
19
19
|
score_column: str = "score",
|
20
20
|
score_bands: list = None,
|
21
21
|
drift_threshold: float = 20.0,
|
22
|
-
):
|
22
|
+
) -> Tuple[Dict[str, pd.DataFrame], bool, RawData]:
|
23
23
|
"""
|
24
24
|
Analyzes drift in population distribution and default rates across score bands.
|
25
25
|
|
@@ -2,7 +2,7 @@
|
|
2
2
|
# See the LICENSE file in the root of this repository for details.
|
3
3
|
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
4
|
|
5
|
-
from typing import List
|
5
|
+
from typing import Dict, List, Tuple
|
6
6
|
|
7
7
|
import numpy as np
|
8
8
|
import pandas as pd
|
@@ -21,7 +21,7 @@ def ScorecardHistogramDrift(
|
|
21
21
|
score_column: str = "score",
|
22
22
|
title: str = "Scorecard Histogram Drift",
|
23
23
|
drift_pct_threshold: float = 20.0,
|
24
|
-
):
|
24
|
+
) -> Tuple[go.Figure, Dict[str, pd.DataFrame], bool]:
|
25
25
|
"""
|
26
26
|
Compares score distributions between reference and monitoring datasets for each class.
|
27
27
|
|
@@ -2,17 +2,24 @@
|
|
2
2
|
# See the LICENSE file in the root of this repository for details.
|
3
3
|
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
4
|
|
5
|
+
from typing import Dict, List, Tuple
|
6
|
+
|
5
7
|
import pandas as pd
|
6
8
|
import plotly.figure_factory as ff
|
7
9
|
import plotly.graph_objects as go
|
8
10
|
from scipy.stats import kurtosis, skew
|
9
11
|
|
10
12
|
from validmind import RawData, tags, tasks
|
13
|
+
from validmind.vm_models import VMDataset, VMModel
|
11
14
|
|
12
15
|
|
13
16
|
@tags("visualization")
|
14
17
|
@tasks("monitoring")
|
15
|
-
def TargetPredictionDistributionPlot(
|
18
|
+
def TargetPredictionDistributionPlot(
|
19
|
+
datasets: List[VMDataset],
|
20
|
+
model: VMModel,
|
21
|
+
drift_pct_threshold: float = 20,
|
22
|
+
) -> Tuple[Dict[str, pd.DataFrame], go.Figure, bool, RawData]:
|
16
23
|
"""
|
17
24
|
Assesses differences in prediction distributions between a reference dataset and a monitoring dataset to identify
|
18
25
|
potential data drift.
|
@@ -2,6 +2,8 @@
|
|
2
2
|
# See the LICENSE file in the root of this repository for details.
|
3
3
|
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
4
|
|
5
|
+
from typing import Any, Dict, List, Tuple
|
6
|
+
|
5
7
|
from validmind import RawData, tags, tasks
|
6
8
|
from validmind.errors import MissingRequiredTestInputError
|
7
9
|
|
@@ -45,7 +47,9 @@ Prompt:
|
|
45
47
|
|
46
48
|
@tags("llm", "few_shot")
|
47
49
|
@tasks("text_classification", "text_summarization")
|
48
|
-
def Bias(
|
50
|
+
def Bias(
|
51
|
+
model, min_threshold=7, judge_llm=None
|
52
|
+
) -> Tuple[List[Dict[str, Any]], bool, RawData]:
|
49
53
|
"""
|
50
54
|
Assesses potential bias in a Large Language Model by analyzing the distribution and order of exemplars in the
|
51
55
|
prompt.
|
@@ -2,6 +2,8 @@
|
|
2
2
|
# See the LICENSE file in the root of this repository for details.
|
3
3
|
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
4
|
|
5
|
+
from typing import Any, Dict, List, Tuple
|
6
|
+
|
5
7
|
from validmind import RawData, tags, tasks
|
6
8
|
from validmind.errors import MissingRequiredTestInputError
|
7
9
|
|
@@ -46,7 +48,9 @@ Prompt:
|
|
46
48
|
|
47
49
|
@tags("llm", "zero_shot", "few_shot")
|
48
50
|
@tasks("text_classification", "text_summarization")
|
49
|
-
def Clarity(
|
51
|
+
def Clarity(
|
52
|
+
model, min_threshold=7, judge_llm=None
|
53
|
+
) -> Tuple[List[Dict[str, Any]], bool, RawData]:
|
50
54
|
"""
|
51
55
|
Evaluates and scores the clarity of prompts in a Large Language Model based on specified guidelines.
|
52
56
|
|
@@ -2,6 +2,8 @@
|
|
2
2
|
# See the LICENSE file in the root of this repository for details.
|
3
3
|
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
4
|
|
5
|
+
from typing import Any, Dict, List, Tuple
|
6
|
+
|
5
7
|
from validmind import RawData, tags, tasks
|
6
8
|
from validmind.errors import MissingRequiredTestInputError
|
7
9
|
|
@@ -54,7 +56,9 @@ Prompt:
|
|
54
56
|
|
55
57
|
@tags("llm", "zero_shot", "few_shot")
|
56
58
|
@tasks("text_classification", "text_summarization")
|
57
|
-
def Conciseness(
|
59
|
+
def Conciseness(
|
60
|
+
model, min_threshold=7, judge_llm=None
|
61
|
+
) -> Tuple[List[Dict[str, Any]], bool, RawData]:
|
58
62
|
"""
|
59
63
|
Analyzes and grades the conciseness of prompts provided to a Large Language Model.
|
60
64
|
|
@@ -2,6 +2,8 @@
|
|
2
2
|
# See the LICENSE file in the root of this repository for details.
|
3
3
|
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
4
|
|
5
|
+
from typing import Any, Dict, List, Tuple
|
6
|
+
|
5
7
|
from validmind import RawData, tags, tasks
|
6
8
|
from validmind.errors import MissingRequiredTestInputError
|
7
9
|
|
@@ -39,7 +41,9 @@ Prompt:
|
|
39
41
|
|
40
42
|
@tags("llm", "zero_shot", "few_shot")
|
41
43
|
@tasks("text_classification", "text_summarization")
|
42
|
-
def Delimitation(
|
44
|
+
def Delimitation(
|
45
|
+
model, min_threshold=7, judge_llm=None
|
46
|
+
) -> Tuple[List[Dict[str, Any]], bool, RawData]:
|
43
47
|
"""
|
44
48
|
Evaluates the proper use of delimiters in prompts provided to Large Language Models.
|
45
49
|
|
@@ -2,6 +2,8 @@
|
|
2
2
|
# See the LICENSE file in the root of this repository for details.
|
3
3
|
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
4
|
|
5
|
+
from typing import Any, Dict, List, Tuple
|
6
|
+
|
5
7
|
from validmind import RawData, tags, tasks
|
6
8
|
from validmind.errors import MissingRequiredTestInputError
|
7
9
|
|
@@ -52,7 +54,9 @@ Prompt:
|
|
52
54
|
|
53
55
|
@tags("llm", "zero_shot", "few_shot")
|
54
56
|
@tasks("text_classification", "text_summarization")
|
55
|
-
def NegativeInstruction(
|
57
|
+
def NegativeInstruction(
|
58
|
+
model, min_threshold=7, judge_llm=None
|
59
|
+
) -> Tuple[List[Dict[str, Any]], bool, RawData]:
|
56
60
|
"""
|
57
61
|
Evaluates and grades the use of affirmative, proactive language over negative instructions in LLM prompts.
|
58
62
|
|
@@ -2,6 +2,8 @@
|
|
2
2
|
# See the LICENSE file in the root of this repository for details.
|
3
3
|
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
4
|
|
5
|
+
from typing import Any, Dict, List, Tuple
|
6
|
+
|
5
7
|
import pandas as pd
|
6
8
|
|
7
9
|
from validmind import RawData, tags, tasks
|
@@ -56,7 +58,9 @@ Input:
|
|
56
58
|
|
57
59
|
@tags("llm", "zero_shot", "few_shot")
|
58
60
|
@tasks("text_classification", "text_summarization")
|
59
|
-
def Robustness(
|
61
|
+
def Robustness(
|
62
|
+
model, dataset, num_tests=10, judge_llm=None
|
63
|
+
) -> Tuple[List[Dict[str, Any]], bool, RawData]:
|
60
64
|
"""
|
61
65
|
Assesses the robustness of prompts provided to a Large Language Model under varying conditions and contexts. This test
|
62
66
|
specifically measures the model's ability to generate correct classifications with the given prompt even when the
|
@@ -2,6 +2,8 @@
|
|
2
2
|
# See the LICENSE file in the root of this repository for details.
|
3
3
|
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
4
|
|
5
|
+
from typing import Any, Dict, List, Tuple
|
6
|
+
|
5
7
|
from validmind import RawData, tags, tasks
|
6
8
|
from validmind.errors import MissingRequiredTestInputError
|
7
9
|
|
@@ -52,7 +54,9 @@ Prompt:
|
|
52
54
|
|
53
55
|
@tags("llm", "zero_shot", "few_shot")
|
54
56
|
@tasks("text_classification", "text_summarization")
|
55
|
-
def Specificity(
|
57
|
+
def Specificity(
|
58
|
+
model, min_threshold=7, judge_llm=None
|
59
|
+
) -> Tuple[List[Dict[str, Any]], bool, RawData]:
|
56
60
|
"""
|
57
61
|
Evaluates and scores the specificity of prompts provided to a Large Language Model (LLM), based on clarity, detail,
|
58
62
|
and relevance.
|
@@ -5,10 +5,11 @@
|
|
5
5
|
from sklearn.metrics import accuracy_score
|
6
6
|
|
7
7
|
from validmind import tags, tasks
|
8
|
+
from validmind.vm_models import VMDataset, VMModel
|
8
9
|
|
9
10
|
|
10
11
|
@tasks("classification")
|
11
12
|
@tags("classification")
|
12
|
-
def Accuracy(dataset, model):
|
13
|
+
def Accuracy(dataset: VMDataset, model: VMModel) -> float:
|
13
14
|
"""Calculates the accuracy of a model"""
|
14
15
|
return accuracy_score(dataset.y, dataset.y_pred(model))
|
@@ -5,10 +5,11 @@
|
|
5
5
|
from sklearn.metrics import f1_score
|
6
6
|
|
7
7
|
from validmind import tags, tasks
|
8
|
+
from validmind.vm_models import VMDataset, VMModel
|
8
9
|
|
9
10
|
|
10
11
|
@tasks("classification")
|
11
12
|
@tags("classification")
|
12
|
-
def F1(model, dataset, **kwargs):
|
13
|
+
def F1(model: VMModel, dataset: VMDataset, **kwargs) -> float:
|
13
14
|
"""Calculates the F1 score for a classification model."""
|
14
15
|
return f1_score(dataset.y, dataset.y_pred(model), **kwargs)
|
@@ -5,10 +5,11 @@
|
|
5
5
|
from sklearn.metrics import precision_score
|
6
6
|
|
7
7
|
from validmind import tags, tasks
|
8
|
+
from validmind.vm_models import VMDataset, VMModel
|
8
9
|
|
9
10
|
|
10
11
|
@tasks("classification")
|
11
12
|
@tags("classification")
|
12
|
-
def Precision(model, dataset, **kwargs):
|
13
|
+
def Precision(model: VMModel, dataset: VMDataset, **kwargs) -> float:
|
13
14
|
"""Calculates the precision for a classification model."""
|
14
15
|
return precision_score(dataset.y, dataset.y_pred(model), **kwargs)
|
@@ -7,11 +7,12 @@ from sklearn.metrics import roc_auc_score
|
|
7
7
|
from sklearn.preprocessing import LabelBinarizer
|
8
8
|
|
9
9
|
from validmind import tags, tasks
|
10
|
+
from validmind.vm_models import VMDataset, VMModel
|
10
11
|
|
11
12
|
|
12
13
|
@tasks("classification")
|
13
14
|
@tags("classification")
|
14
|
-
def ROC_AUC(model, dataset, **kwargs):
|
15
|
+
def ROC_AUC(model: VMModel, dataset: VMDataset, **kwargs) -> float:
|
15
16
|
"""Calculates the ROC AUC for a classification model."""
|
16
17
|
y_true = dataset.y
|
17
18
|
|
@@ -5,10 +5,11 @@
|
|
5
5
|
from sklearn.metrics import recall_score
|
6
6
|
|
7
7
|
from validmind import tags, tasks
|
8
|
+
from validmind.vm_models import VMDataset, VMModel
|
8
9
|
|
9
10
|
|
10
11
|
@tasks("classification")
|
11
12
|
@tags("classification")
|
12
|
-
def Recall(model, dataset, **kwargs):
|
13
|
+
def Recall(model: VMModel, dataset: VMDataset, **kwargs) -> float:
|
13
14
|
"""Calculates the recall for a classification model."""
|
14
15
|
return recall_score(dataset.y, dataset.y_pred(model), **kwargs)
|
@@ -5,11 +5,12 @@
|
|
5
5
|
from sklearn.metrics import r2_score as _r2_score
|
6
6
|
|
7
7
|
from validmind import tags, tasks
|
8
|
+
from validmind.vm_models import VMDataset, VMModel
|
8
9
|
|
9
10
|
|
10
11
|
@tags("regression")
|
11
12
|
@tasks("regression")
|
12
|
-
def AdjustedRSquaredScore(model, dataset):
|
13
|
+
def AdjustedRSquaredScore(model: VMModel, dataset: VMDataset) -> float:
|
13
14
|
"""Calculates the adjusted R-squared score for a regression model."""
|
14
15
|
r2_score = _r2_score(
|
15
16
|
dataset.y,
|
@@ -5,11 +5,12 @@
|
|
5
5
|
import numpy as np
|
6
6
|
|
7
7
|
from validmind import tags, tasks
|
8
|
+
from validmind.vm_models import VMDataset, VMModel
|
8
9
|
|
9
10
|
|
10
11
|
@tags("regression")
|
11
12
|
@tasks("regression")
|
12
|
-
def GiniCoefficient(dataset, model):
|
13
|
+
def GiniCoefficient(dataset: VMDataset, model: VMModel) -> float:
|
13
14
|
"""Calculates the Gini coefficient for a regression model."""
|
14
15
|
y_true = dataset.y
|
15
16
|
y_pred = dataset.y_pred(model)
|
@@ -5,11 +5,12 @@
|
|
5
5
|
import numpy as np
|
6
6
|
|
7
7
|
from validmind import tags, tasks
|
8
|
+
from validmind.vm_models import VMDataset, VMModel
|
8
9
|
|
9
10
|
|
10
11
|
@tags("regression")
|
11
12
|
@tasks("regression")
|
12
|
-
def HuberLoss(model, dataset):
|
13
|
+
def HuberLoss(model: VMModel, dataset: VMDataset) -> float:
|
13
14
|
"""Calculates the Huber loss for a regression model."""
|
14
15
|
y_true = dataset.y
|
15
16
|
y_pred = dataset.y_pred(model)
|
@@ -5,11 +5,12 @@
|
|
5
5
|
import numpy as np
|
6
6
|
|
7
7
|
from validmind import tags, tasks
|
8
|
+
from validmind.vm_models import VMDataset, VMModel
|
8
9
|
|
9
10
|
|
10
11
|
@tags("regression")
|
11
12
|
@tasks("regression")
|
12
|
-
def KolmogorovSmirnovStatistic(dataset, model):
|
13
|
+
def KolmogorovSmirnovStatistic(dataset: VMDataset, model: VMModel) -> float:
|
13
14
|
"""Calculates the Kolmogorov-Smirnov statistic for a regression model."""
|
14
15
|
y_true = dataset.y.flatten()
|
15
16
|
y_pred = dataset.y_pred(model)
|
@@ -5,10 +5,11 @@
|
|
5
5
|
from sklearn.metrics import mean_absolute_error as _mean_absolute_error
|
6
6
|
|
7
7
|
from validmind import tags, tasks
|
8
|
+
from validmind.vm_models import VMDataset, VMModel
|
8
9
|
|
9
10
|
|
10
11
|
@tags("regression")
|
11
12
|
@tasks("regression")
|
12
|
-
def MeanAbsoluteError(model, dataset, **kwargs):
|
13
|
+
def MeanAbsoluteError(model: VMModel, dataset: VMDataset, **kwargs) -> float:
|
13
14
|
"""Calculates the mean absolute error for a regression model."""
|
14
15
|
return _mean_absolute_error(dataset.y, dataset.y_pred(model), **kwargs)
|
@@ -5,11 +5,12 @@
|
|
5
5
|
import numpy as np
|
6
6
|
|
7
7
|
from validmind import tags, tasks
|
8
|
+
from validmind.vm_models import VMDataset, VMModel
|
8
9
|
|
9
10
|
|
10
11
|
@tags("regression")
|
11
12
|
@tasks("regression")
|
12
|
-
def MeanAbsolutePercentageError(model, dataset):
|
13
|
+
def MeanAbsolutePercentageError(model: VMModel, dataset: VMDataset) -> float:
|
13
14
|
"""Calculates the mean absolute percentage error for a regression model."""
|
14
15
|
y_true = dataset.y
|
15
16
|
y_pred = dataset.y_pred(model)
|
@@ -5,10 +5,11 @@
|
|
5
5
|
import numpy as np
|
6
6
|
|
7
7
|
from validmind import tags, tasks
|
8
|
+
from validmind.vm_models import VMDataset, VMModel
|
8
9
|
|
9
10
|
|
10
11
|
@tags("regression")
|
11
12
|
@tasks("regression")
|
12
|
-
def MeanBiasDeviation(model, dataset):
|
13
|
+
def MeanBiasDeviation(model: VMModel, dataset: VMDataset) -> float:
|
13
14
|
"""Calculates the mean bias deviation for a regression model."""
|
14
15
|
return np.mean(dataset.y - dataset.y_pred(model))
|
@@ -5,10 +5,11 @@
|
|
5
5
|
from sklearn.metrics import mean_squared_error
|
6
6
|
|
7
7
|
from validmind import tags, tasks
|
8
|
+
from validmind.vm_models import VMDataset, VMModel
|
8
9
|
|
9
10
|
|
10
11
|
@tags("regression")
|
11
12
|
@tasks("regression")
|
12
|
-
def MeanSquaredError(model, dataset, **kwargs):
|
13
|
+
def MeanSquaredError(model: VMModel, dataset: VMDataset, **kwargs) -> float:
|
13
14
|
"""Calculates the mean squared error for a regression model."""
|
14
15
|
return mean_squared_error(dataset.y, dataset.y_pred(model), **kwargs)
|
@@ -9,7 +9,7 @@ from validmind import tags, tasks
|
|
9
9
|
|
10
10
|
@tags("regression")
|
11
11
|
@tasks("regression")
|
12
|
-
def QuantileLoss(model, dataset, quantile=0.5):
|
12
|
+
def QuantileLoss(model, dataset, quantile=0.5) -> float:
|
13
13
|
"""Calculates the quantile loss for a regression model."""
|
14
14
|
error = dataset.y - dataset.y_pred(model)
|
15
15
|
|
@@ -5,10 +5,11 @@
|
|
5
5
|
from sklearn.metrics import r2_score
|
6
6
|
|
7
7
|
from validmind import tags, tasks
|
8
|
+
from validmind.vm_models import VMDataset, VMModel
|
8
9
|
|
9
10
|
|
10
11
|
@tags("regression")
|
11
12
|
@tasks("regression")
|
12
|
-
def RSquaredScore(model, dataset):
|
13
|
+
def RSquaredScore(model: VMModel, dataset: VMDataset) -> float:
|
13
14
|
"""Calculates the R-squared score for a regression model."""
|
14
15
|
return r2_score(dataset.y, dataset.y_pred(model))
|
@@ -6,11 +6,12 @@ import numpy as np
|
|
6
6
|
from sklearn.metrics import mean_squared_error
|
7
7
|
|
8
8
|
from validmind import tags, tasks
|
9
|
+
from validmind.vm_models import VMDataset, VMModel
|
9
10
|
|
10
11
|
|
11
12
|
@tags("regression")
|
12
13
|
@tasks("regression")
|
13
|
-
def RootMeanSquaredError(model, dataset, **kwargs):
|
14
|
+
def RootMeanSquaredError(model: VMModel, dataset: VMDataset, **kwargs) -> float:
|
14
15
|
"""Calculates the root mean squared error for a regression model."""
|
15
16
|
return np.sqrt(
|
16
17
|
mean_squared_error(
|