validmind 2.8.28__py3-none-any.whl → 2.9.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- validmind/__version__.py +1 -1
- validmind/ai/utils.py +4 -24
- validmind/api_client.py +6 -17
- validmind/logging.py +48 -0
- validmind/models/function.py +11 -3
- validmind/tests/__init__.py +2 -0
- validmind/tests/__types__.py +18 -0
- validmind/tests/data_validation/ACFandPACFPlot.py +3 -1
- validmind/tests/data_validation/ADF.py +3 -1
- validmind/tests/data_validation/AutoAR.py +3 -1
- validmind/tests/data_validation/AutoMA.py +5 -1
- validmind/tests/data_validation/AutoStationarity.py +5 -1
- validmind/tests/data_validation/BivariateScatterPlots.py +3 -1
- validmind/tests/data_validation/BoxPierce.py +4 -1
- validmind/tests/data_validation/ChiSquaredFeaturesTable.py +1 -1
- validmind/tests/data_validation/ClassImbalance.py +1 -1
- validmind/tests/data_validation/DatasetDescription.py +4 -1
- validmind/tests/data_validation/DatasetSplit.py +3 -2
- validmind/tests/data_validation/DescriptiveStatistics.py +3 -1
- validmind/tests/data_validation/DickeyFullerGLS.py +3 -1
- validmind/tests/data_validation/Duplicates.py +3 -1
- validmind/tests/data_validation/EngleGrangerCoint.py +6 -1
- validmind/tests/data_validation/FeatureTargetCorrelationPlot.py +3 -1
- validmind/tests/data_validation/HighCardinality.py +3 -1
- validmind/tests/data_validation/HighPearsonCorrelation.py +4 -1
- validmind/tests/data_validation/IQROutliersBarPlot.py +4 -1
- validmind/tests/data_validation/IQROutliersTable.py +6 -1
- validmind/tests/data_validation/IsolationForestOutliers.py +3 -1
- validmind/tests/data_validation/JarqueBera.py +3 -1
- validmind/tests/data_validation/KPSS.py +3 -1
- validmind/tests/data_validation/LJungBox.py +3 -1
- validmind/tests/data_validation/LaggedCorrelationHeatmap.py +6 -1
- validmind/tests/data_validation/MissingValues.py +5 -1
- validmind/tests/data_validation/MissingValuesBarPlot.py +3 -1
- validmind/tests/data_validation/MutualInformation.py +4 -1
- validmind/tests/data_validation/PearsonCorrelationMatrix.py +3 -1
- validmind/tests/data_validation/PhillipsPerronArch.py +3 -1
- validmind/tests/data_validation/ProtectedClassesCombination.py +5 -1
- validmind/tests/data_validation/ProtectedClassesDescription.py +5 -1
- validmind/tests/data_validation/ProtectedClassesDisparity.py +5 -3
- validmind/tests/data_validation/ProtectedClassesThresholdOptimizer.py +9 -2
- validmind/tests/data_validation/RollingStatsPlot.py +5 -1
- validmind/tests/data_validation/RunsTest.py +1 -1
- validmind/tests/data_validation/ScatterPlot.py +2 -1
- validmind/tests/data_validation/ScoreBandDefaultRates.py +3 -1
- validmind/tests/data_validation/SeasonalDecompose.py +6 -1
- validmind/tests/data_validation/ShapiroWilk.py +4 -1
- validmind/tests/data_validation/Skewness.py +3 -1
- validmind/tests/data_validation/SpreadPlot.py +3 -1
- validmind/tests/data_validation/TabularCategoricalBarPlots.py +4 -1
- validmind/tests/data_validation/TabularDateTimeHistograms.py +3 -1
- validmind/tests/data_validation/TabularDescriptionTables.py +4 -1
- validmind/tests/data_validation/TabularNumericalHistograms.py +3 -1
- validmind/tests/data_validation/TargetRateBarPlots.py +4 -1
- validmind/tests/data_validation/TimeSeriesDescription.py +1 -1
- validmind/tests/data_validation/TimeSeriesDescriptiveStatistics.py +1 -1
- validmind/tests/data_validation/TimeSeriesFrequency.py +5 -1
- validmind/tests/data_validation/TimeSeriesHistogram.py +4 -1
- validmind/tests/data_validation/TimeSeriesLinePlot.py +3 -1
- validmind/tests/data_validation/TimeSeriesMissingValues.py +6 -1
- validmind/tests/data_validation/TimeSeriesOutliers.py +5 -1
- validmind/tests/data_validation/TooManyZeroValues.py +6 -1
- validmind/tests/data_validation/UniqueRows.py +5 -1
- validmind/tests/data_validation/WOEBinPlots.py +4 -1
- validmind/tests/data_validation/WOEBinTable.py +5 -1
- validmind/tests/data_validation/ZivotAndrewsArch.py +3 -1
- validmind/tests/data_validation/nlp/CommonWords.py +2 -1
- validmind/tests/data_validation/nlp/Hashtags.py +2 -1
- validmind/tests/data_validation/nlp/LanguageDetection.py +4 -1
- validmind/tests/data_validation/nlp/Mentions.py +3 -1
- validmind/tests/data_validation/nlp/PolarityAndSubjectivity.py +6 -1
- validmind/tests/data_validation/nlp/Punctuations.py +2 -1
- validmind/tests/data_validation/nlp/Sentiment.py +3 -1
- validmind/tests/data_validation/nlp/StopWords.py +2 -1
- validmind/tests/data_validation/nlp/TextDescription.py +3 -1
- validmind/tests/data_validation/nlp/Toxicity.py +3 -1
- validmind/tests/load.py +91 -17
- validmind/tests/model_validation/BertScore.py +6 -3
- validmind/tests/model_validation/BleuScore.py +6 -1
- validmind/tests/model_validation/ClusterSizeDistribution.py +5 -1
- validmind/tests/model_validation/ContextualRecall.py +6 -1
- validmind/tests/model_validation/FeaturesAUC.py +5 -1
- validmind/tests/model_validation/MeteorScore.py +6 -1
- validmind/tests/model_validation/ModelMetadata.py +2 -1
- validmind/tests/model_validation/ModelPredictionResiduals.py +10 -2
- validmind/tests/model_validation/RegardScore.py +7 -1
- validmind/tests/model_validation/RegressionResidualsPlot.py +5 -1
- validmind/tests/model_validation/RougeScore.py +8 -1
- validmind/tests/model_validation/TimeSeriesPredictionWithCI.py +8 -1
- validmind/tests/model_validation/TimeSeriesPredictionsPlot.py +7 -1
- validmind/tests/model_validation/TimeSeriesR2SquareBySegments.py +6 -1
- validmind/tests/model_validation/TokenDisparity.py +6 -1
- validmind/tests/model_validation/ToxicityScore.py +6 -1
- validmind/tests/model_validation/embeddings/ClusterDistribution.py +6 -1
- validmind/tests/model_validation/embeddings/CosineSimilarityComparison.py +6 -1
- validmind/tests/model_validation/embeddings/CosineSimilarityDistribution.py +6 -1
- validmind/tests/model_validation/embeddings/CosineSimilarityHeatmap.py +7 -3
- validmind/tests/model_validation/embeddings/DescriptiveAnalytics.py +6 -1
- validmind/tests/model_validation/embeddings/EmbeddingsVisualization2D.py +4 -3
- validmind/tests/model_validation/embeddings/EuclideanDistanceComparison.py +6 -1
- validmind/tests/model_validation/embeddings/EuclideanDistanceHeatmap.py +7 -3
- validmind/tests/model_validation/embeddings/PCAComponentsPairwisePlots.py +6 -1
- validmind/tests/model_validation/embeddings/StabilityAnalysisKeyword.py +5 -2
- validmind/tests/model_validation/embeddings/StabilityAnalysisRandomNoise.py +5 -1
- validmind/tests/model_validation/embeddings/StabilityAnalysisSynonyms.py +4 -1
- validmind/tests/model_validation/embeddings/StabilityAnalysisTranslation.py +5 -1
- validmind/tests/model_validation/embeddings/TSNEComponentsPairwisePlots.py +9 -6
- validmind/tests/model_validation/ragas/AnswerCorrectness.py +8 -5
- validmind/tests/model_validation/ragas/AspectCritic.py +11 -8
- validmind/tests/model_validation/ragas/ContextEntityRecall.py +5 -2
- validmind/tests/model_validation/ragas/ContextPrecision.py +5 -2
- validmind/tests/model_validation/ragas/ContextPrecisionWithoutReference.py +5 -2
- validmind/tests/model_validation/ragas/ContextRecall.py +6 -2
- validmind/tests/model_validation/ragas/Faithfulness.py +9 -5
- validmind/tests/model_validation/ragas/NoiseSensitivity.py +10 -7
- validmind/tests/model_validation/ragas/ResponseRelevancy.py +9 -6
- validmind/tests/model_validation/ragas/SemanticSimilarity.py +7 -4
- validmind/tests/model_validation/sklearn/AdjustedMutualInformation.py +5 -1
- validmind/tests/model_validation/sklearn/AdjustedRandIndex.py +5 -1
- validmind/tests/model_validation/sklearn/CalibrationCurve.py +5 -1
- validmind/tests/model_validation/sklearn/ClassifierPerformance.py +5 -1
- validmind/tests/model_validation/sklearn/ClusterCosineSimilarity.py +5 -1
- validmind/tests/model_validation/sklearn/ClusterPerformanceMetrics.py +5 -1
- validmind/tests/model_validation/sklearn/CompletenessScore.py +5 -1
- validmind/tests/model_validation/sklearn/ConfusionMatrix.py +4 -1
- validmind/tests/model_validation/sklearn/FeatureImportance.py +5 -1
- validmind/tests/model_validation/sklearn/FowlkesMallowsScore.py +5 -1
- validmind/tests/model_validation/sklearn/HomogeneityScore.py +5 -1
- validmind/tests/model_validation/sklearn/HyperParametersTuning.py +2 -4
- validmind/tests/model_validation/sklearn/KMeansClustersOptimization.py +3 -3
- validmind/tests/model_validation/sklearn/MinimumAccuracy.py +5 -1
- validmind/tests/model_validation/sklearn/MinimumF1Score.py +5 -1
- validmind/tests/model_validation/sklearn/MinimumROCAUCScore.py +5 -1
- validmind/tests/model_validation/sklearn/ModelParameters.py +6 -1
- validmind/tests/model_validation/sklearn/ModelsPerformanceComparison.py +5 -1
- validmind/tests/model_validation/sklearn/OverfitDiagnosis.py +3 -2
- validmind/tests/model_validation/sklearn/PermutationFeatureImportance.py +4 -4
- validmind/tests/model_validation/sklearn/PopulationStabilityIndex.py +2 -2
- validmind/tests/model_validation/sklearn/PrecisionRecallCurve.py +5 -1
- validmind/tests/model_validation/sklearn/ROCCurve.py +3 -1
- validmind/tests/model_validation/sklearn/RegressionErrors.py +6 -1
- validmind/tests/model_validation/sklearn/RegressionErrorsComparison.py +6 -1
- validmind/tests/model_validation/sklearn/RegressionPerformance.py +5 -1
- validmind/tests/model_validation/sklearn/RegressionR2Square.py +6 -1
- validmind/tests/model_validation/sklearn/RegressionR2SquareComparison.py +6 -1
- validmind/tests/model_validation/sklearn/RobustnessDiagnosis.py +2 -2
- validmind/tests/model_validation/sklearn/ScoreProbabilityAlignment.py +3 -1
- validmind/tests/model_validation/sklearn/SilhouettePlot.py +6 -1
- validmind/tests/model_validation/sklearn/TrainingTestDegradation.py +2 -2
- validmind/tests/model_validation/sklearn/VMeasure.py +5 -1
- validmind/tests/model_validation/sklearn/WeakspotsDiagnosis.py +6 -5
- validmind/tests/model_validation/statsmodels/AutoARIMA.py +3 -1
- validmind/tests/model_validation/statsmodels/CumulativePredictionProbabilities.py +6 -1
- validmind/tests/model_validation/statsmodels/DurbinWatsonTest.py +6 -1
- validmind/tests/model_validation/statsmodels/GINITable.py +4 -1
- validmind/tests/model_validation/statsmodels/KolmogorovSmirnov.py +5 -1
- validmind/tests/model_validation/statsmodels/Lilliefors.py +3 -1
- validmind/tests/model_validation/statsmodels/PredictionProbabilitiesHistogram.py +6 -2
- validmind/tests/model_validation/statsmodels/RegressionCoeffs.py +4 -1
- validmind/tests/model_validation/statsmodels/RegressionFeatureSignificance.py +7 -2
- validmind/tests/model_validation/statsmodels/RegressionModelForecastPlot.py +5 -4
- validmind/tests/model_validation/statsmodels/RegressionModelForecastPlotLevels.py +4 -1
- validmind/tests/model_validation/statsmodels/RegressionModelSensitivityPlot.py +3 -2
- validmind/tests/model_validation/statsmodels/RegressionModelSummary.py +5 -1
- validmind/tests/model_validation/statsmodels/RegressionPermutationFeatureImportance.py +3 -1
- validmind/tests/model_validation/statsmodels/ScorecardHistogram.py +6 -1
- validmind/tests/ongoing_monitoring/CalibrationCurveDrift.py +2 -2
- validmind/tests/ongoing_monitoring/ClassDiscriminationDrift.py +2 -2
- validmind/tests/ongoing_monitoring/ClassImbalanceDrift.py +2 -2
- validmind/tests/ongoing_monitoring/ClassificationAccuracyDrift.py +2 -2
- validmind/tests/ongoing_monitoring/ConfusionMatrixDrift.py +2 -2
- validmind/tests/ongoing_monitoring/CumulativePredictionProbabilitiesDrift.py +2 -2
- validmind/tests/ongoing_monitoring/FeatureDrift.py +5 -2
- validmind/tests/ongoing_monitoring/PredictionAcrossEachFeature.py +6 -1
- validmind/tests/ongoing_monitoring/PredictionCorrelation.py +8 -1
- validmind/tests/ongoing_monitoring/PredictionProbabilitiesHistogramDrift.py +2 -2
- validmind/tests/ongoing_monitoring/PredictionQuantilesAcrossFeatures.py +6 -1
- validmind/tests/ongoing_monitoring/ROCCurveDrift.py +4 -2
- validmind/tests/ongoing_monitoring/ScoreBandsDrift.py +2 -2
- validmind/tests/ongoing_monitoring/ScorecardHistogramDrift.py +2 -2
- validmind/tests/ongoing_monitoring/TargetPredictionDistributionPlot.py +8 -1
- validmind/tests/output.py +9 -2
- validmind/tests/plots/BoxPlot.py +260 -0
- validmind/tests/plots/CorrelationHeatmap.py +235 -0
- validmind/tests/plots/HistogramPlot.py +233 -0
- validmind/tests/plots/ViolinPlot.py +125 -0
- validmind/tests/plots/__init__.py +0 -0
- validmind/tests/prompt_validation/Bias.py +5 -1
- validmind/tests/prompt_validation/Clarity.py +5 -1
- validmind/tests/prompt_validation/Conciseness.py +5 -1
- validmind/tests/prompt_validation/Delimitation.py +5 -1
- validmind/tests/prompt_validation/NegativeInstruction.py +5 -1
- validmind/tests/prompt_validation/Robustness.py +5 -1
- validmind/tests/prompt_validation/Specificity.py +5 -1
- validmind/tests/stats/CorrelationAnalysis.py +251 -0
- validmind/tests/stats/DescriptiveStats.py +197 -0
- validmind/tests/stats/NormalityTests.py +147 -0
- validmind/tests/stats/OutlierDetection.py +173 -0
- validmind/tests/stats/__init__.py +0 -0
- validmind/unit_metrics/classification/Accuracy.py +2 -1
- validmind/unit_metrics/classification/F1.py +2 -1
- validmind/unit_metrics/classification/Precision.py +2 -1
- validmind/unit_metrics/classification/ROC_AUC.py +2 -1
- validmind/unit_metrics/classification/Recall.py +2 -1
- validmind/unit_metrics/classification/individual/AbsoluteError.py +42 -0
- validmind/unit_metrics/classification/individual/BrierScore.py +56 -0
- validmind/unit_metrics/classification/individual/CalibrationError.py +77 -0
- validmind/unit_metrics/classification/individual/ClassBalance.py +65 -0
- validmind/unit_metrics/classification/individual/Confidence.py +52 -0
- validmind/unit_metrics/classification/individual/Correctness.py +41 -0
- validmind/unit_metrics/classification/individual/LogLoss.py +61 -0
- validmind/unit_metrics/classification/individual/OutlierScore.py +86 -0
- validmind/unit_metrics/classification/individual/ProbabilityError.py +54 -0
- validmind/unit_metrics/classification/individual/Uncertainty.py +60 -0
- validmind/unit_metrics/classification/individual/__init__.py +0 -0
- validmind/unit_metrics/regression/AdjustedRSquaredScore.py +2 -1
- validmind/unit_metrics/regression/GiniCoefficient.py +2 -1
- validmind/unit_metrics/regression/HuberLoss.py +2 -1
- validmind/unit_metrics/regression/KolmogorovSmirnovStatistic.py +2 -1
- validmind/unit_metrics/regression/MeanAbsoluteError.py +2 -1
- validmind/unit_metrics/regression/MeanAbsolutePercentageError.py +2 -1
- validmind/unit_metrics/regression/MeanBiasDeviation.py +2 -1
- validmind/unit_metrics/regression/MeanSquaredError.py +2 -1
- validmind/unit_metrics/regression/QuantileLoss.py +1 -1
- validmind/unit_metrics/regression/RSquaredScore.py +2 -1
- validmind/unit_metrics/regression/RootMeanSquaredError.py +2 -1
- validmind/vm_models/dataset/dataset.py +291 -38
- validmind/vm_models/result/result.py +26 -4
- {validmind-2.8.28.dist-info → validmind-2.9.1.dist-info}/METADATA +2 -2
- {validmind-2.8.28.dist-info → validmind-2.9.1.dist-info}/RECORD +233 -212
- {validmind-2.8.28.dist-info → validmind-2.9.1.dist-info}/LICENSE +0 -0
- {validmind-2.8.28.dist-info → validmind-2.9.1.dist-info}/WHEEL +0 -0
- {validmind-2.8.28.dist-info → validmind-2.9.1.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,235 @@
|
|
1
|
+
# Copyright © 2023-2024 ValidMind Inc. All rights reserved.
|
2
|
+
# See the LICENSE file in the root of this repository for details.
|
3
|
+
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
|
+
|
5
|
+
from typing import List, Optional
|
6
|
+
|
7
|
+
import numpy as np
|
8
|
+
import plotly.graph_objects as go
|
9
|
+
|
10
|
+
from validmind import tags, tasks
|
11
|
+
from validmind.errors import SkipTestError
|
12
|
+
from validmind.vm_models import VMDataset
|
13
|
+
|
14
|
+
|
15
|
+
def _validate_and_prepare_data(
|
16
|
+
dataset: VMDataset, columns: Optional[List[str]], method: str
|
17
|
+
):
|
18
|
+
"""Validate inputs and prepare correlation data."""
|
19
|
+
if columns is None:
|
20
|
+
columns = dataset.feature_columns_numeric
|
21
|
+
else:
|
22
|
+
available_columns = set(dataset.feature_columns_numeric)
|
23
|
+
columns = [col for col in columns if col in available_columns]
|
24
|
+
|
25
|
+
if not columns:
|
26
|
+
raise SkipTestError("No numerical columns found for correlation analysis")
|
27
|
+
|
28
|
+
if len(columns) < 2:
|
29
|
+
raise SkipTestError(
|
30
|
+
"At least 2 numerical columns required for correlation analysis"
|
31
|
+
)
|
32
|
+
|
33
|
+
# Get data and remove constant columns
|
34
|
+
data = dataset.df[columns]
|
35
|
+
data = data.loc[:, data.var() != 0]
|
36
|
+
|
37
|
+
if data.shape[1] < 2:
|
38
|
+
raise SkipTestError(
|
39
|
+
"Insufficient non-constant columns for correlation analysis"
|
40
|
+
)
|
41
|
+
|
42
|
+
return data.corr(method=method)
|
43
|
+
|
44
|
+
|
45
|
+
def _apply_filters(corr_matrix, threshold: Optional[float], mask_upper: bool):
|
46
|
+
"""Apply threshold and masking filters to correlation matrix."""
|
47
|
+
if threshold is not None:
|
48
|
+
mask = np.abs(corr_matrix) < threshold
|
49
|
+
corr_matrix = corr_matrix.mask(mask)
|
50
|
+
|
51
|
+
if mask_upper:
|
52
|
+
mask = np.triu(np.ones_like(corr_matrix, dtype=bool))
|
53
|
+
corr_matrix = corr_matrix.mask(mask)
|
54
|
+
|
55
|
+
return corr_matrix
|
56
|
+
|
57
|
+
|
58
|
+
def _create_annotation_text(z_values, y_labels, x_labels, show_values: bool):
|
59
|
+
"""Create text annotations for heatmap cells."""
|
60
|
+
if not show_values:
|
61
|
+
return None
|
62
|
+
|
63
|
+
text = []
|
64
|
+
for i in range(len(y_labels)):
|
65
|
+
text_row = []
|
66
|
+
for j in range(len(x_labels)):
|
67
|
+
value = z_values[i][j]
|
68
|
+
if np.isnan(value):
|
69
|
+
text_row.append("")
|
70
|
+
else:
|
71
|
+
text_row.append(f"{value:.3f}")
|
72
|
+
text.append(text_row)
|
73
|
+
return text
|
74
|
+
|
75
|
+
|
76
|
+
def _calculate_adaptive_font_size(n_features: int) -> int:
|
77
|
+
"""Calculate adaptive font size based on number of features."""
|
78
|
+
if n_features <= 10:
|
79
|
+
return 12
|
80
|
+
elif n_features <= 20:
|
81
|
+
return 10
|
82
|
+
elif n_features <= 30:
|
83
|
+
return 8
|
84
|
+
else:
|
85
|
+
return 6
|
86
|
+
|
87
|
+
|
88
|
+
def _calculate_stats_and_update_layout(
|
89
|
+
fig, corr_matrix, method: str, title: str, width: int, height: int
|
90
|
+
):
|
91
|
+
"""Calculate statistics and update figure layout."""
|
92
|
+
n_features = corr_matrix.shape[0]
|
93
|
+
upper_triangle = corr_matrix.values[np.triu_indices_from(corr_matrix.values, k=1)]
|
94
|
+
upper_triangle = upper_triangle[~np.isnan(upper_triangle)]
|
95
|
+
|
96
|
+
if len(upper_triangle) > 0:
|
97
|
+
mean_corr = np.abs(upper_triangle).mean()
|
98
|
+
max_corr = np.abs(upper_triangle).max()
|
99
|
+
stats_text = f"Features: {n_features}<br>Mean |r|: {mean_corr:.3f}<br>Max |r|: {max_corr:.3f}"
|
100
|
+
else:
|
101
|
+
stats_text = f"Features: {n_features}"
|
102
|
+
|
103
|
+
fig.update_layout(
|
104
|
+
title={
|
105
|
+
"text": f"{title} ({method.capitalize()} Correlation)",
|
106
|
+
"x": 0.5,
|
107
|
+
"xanchor": "center",
|
108
|
+
},
|
109
|
+
width=width,
|
110
|
+
height=height,
|
111
|
+
template="plotly_white",
|
112
|
+
xaxis=dict(tickangle=45, side="bottom"),
|
113
|
+
yaxis=dict(tickmode="linear", autorange="reversed"),
|
114
|
+
annotations=[
|
115
|
+
dict(
|
116
|
+
text=stats_text,
|
117
|
+
x=0.02,
|
118
|
+
y=0.98,
|
119
|
+
xref="paper",
|
120
|
+
yref="paper",
|
121
|
+
showarrow=False,
|
122
|
+
align="left",
|
123
|
+
bgcolor="rgba(255,255,255,0.8)",
|
124
|
+
bordercolor="black",
|
125
|
+
borderwidth=1,
|
126
|
+
)
|
127
|
+
],
|
128
|
+
)
|
129
|
+
|
130
|
+
|
131
|
+
@tags("tabular_data", "visualization", "correlation")
|
132
|
+
@tasks("classification", "regression", "clustering")
|
133
|
+
def CorrelationHeatmap(
|
134
|
+
dataset: VMDataset,
|
135
|
+
columns: Optional[List[str]] = None,
|
136
|
+
method: str = "pearson",
|
137
|
+
show_values: bool = True,
|
138
|
+
colorscale: str = "RdBu",
|
139
|
+
width: int = 800,
|
140
|
+
height: int = 600,
|
141
|
+
mask_upper: bool = False,
|
142
|
+
threshold: Optional[float] = None,
|
143
|
+
title: str = "Correlation Heatmap",
|
144
|
+
) -> go.Figure:
|
145
|
+
"""
|
146
|
+
Generates customizable correlation heatmap plots for numerical features in a dataset using Plotly.
|
147
|
+
|
148
|
+
### Purpose
|
149
|
+
|
150
|
+
This test provides a flexible way to visualize correlations between numerical features
|
151
|
+
in a dataset using interactive Plotly heatmaps. It supports different correlation methods
|
152
|
+
and extensive customization options for the heatmap appearance, making it suitable for
|
153
|
+
exploring feature relationships in data analysis.
|
154
|
+
|
155
|
+
### Test Mechanism
|
156
|
+
|
157
|
+
The test computes correlation coefficients between specified numerical columns
|
158
|
+
(or all numerical columns if none specified) using the specified method.
|
159
|
+
It then creates an interactive heatmap visualization with customizable appearance options including:
|
160
|
+
- Different correlation methods (pearson, spearman, kendall)
|
161
|
+
- Color schemes and annotations
|
162
|
+
- Masking options for upper triangle
|
163
|
+
- Threshold filtering for significant correlations
|
164
|
+
- Interactive hover information
|
165
|
+
|
166
|
+
### Signs of High Risk
|
167
|
+
|
168
|
+
- Very high correlations (>0.9) between features indicating multicollinearity
|
169
|
+
- Unexpected correlation patterns that contradict domain knowledge
|
170
|
+
- Features with no correlation to any other variables
|
171
|
+
- Strong correlations with the target variable that might indicate data leakage
|
172
|
+
|
173
|
+
### Strengths
|
174
|
+
|
175
|
+
- Supports multiple correlation methods
|
176
|
+
- Interactive Plotly plots with hover information and zoom capabilities
|
177
|
+
- Highly customizable visualization options
|
178
|
+
- Can handle missing values appropriately
|
179
|
+
- Provides clear visual representation of feature relationships
|
180
|
+
- Optional thresholding to focus on significant correlations
|
181
|
+
|
182
|
+
### Limitations
|
183
|
+
|
184
|
+
- Limited to numerical features only
|
185
|
+
- Cannot capture non-linear relationships effectively
|
186
|
+
- May be difficult to interpret with many features
|
187
|
+
- Correlation does not imply causation
|
188
|
+
"""
|
189
|
+
# Validate inputs and compute correlation
|
190
|
+
corr_matrix = _validate_and_prepare_data(dataset, columns, method)
|
191
|
+
|
192
|
+
# Apply filters
|
193
|
+
corr_matrix = _apply_filters(corr_matrix, threshold, mask_upper)
|
194
|
+
|
195
|
+
# Prepare heatmap data
|
196
|
+
z_values = corr_matrix.values
|
197
|
+
x_labels = corr_matrix.columns.tolist()
|
198
|
+
y_labels = corr_matrix.index.tolist()
|
199
|
+
text = _create_annotation_text(z_values, y_labels, x_labels, show_values)
|
200
|
+
|
201
|
+
# Calculate adaptive font size
|
202
|
+
n_features = len(x_labels)
|
203
|
+
font_size = _calculate_adaptive_font_size(n_features)
|
204
|
+
|
205
|
+
# Create heatmap
|
206
|
+
heatmap_kwargs = {
|
207
|
+
"z": z_values,
|
208
|
+
"x": x_labels,
|
209
|
+
"y": y_labels,
|
210
|
+
"colorscale": colorscale,
|
211
|
+
"zmin": -1,
|
212
|
+
"zmax": 1,
|
213
|
+
"colorbar": dict(title=f"{method.capitalize()} Correlation"),
|
214
|
+
"hoverongaps": False,
|
215
|
+
"hovertemplate": "<b>%{y}</b> vs <b>%{x}</b><br>"
|
216
|
+
+ f"{method.capitalize()} Correlation: %{{z:.3f}}<br>"
|
217
|
+
+ "<extra></extra>",
|
218
|
+
}
|
219
|
+
|
220
|
+
# Add text annotations if requested
|
221
|
+
if show_values and text is not None:
|
222
|
+
heatmap_kwargs.update(
|
223
|
+
{
|
224
|
+
"text": text,
|
225
|
+
"texttemplate": "%{text}",
|
226
|
+
"textfont": {"size": font_size, "color": "black"},
|
227
|
+
}
|
228
|
+
)
|
229
|
+
|
230
|
+
fig = go.Figure(data=go.Heatmap(**heatmap_kwargs))
|
231
|
+
|
232
|
+
# Update layout with stats
|
233
|
+
_calculate_stats_and_update_layout(fig, corr_matrix, method, title, width, height)
|
234
|
+
|
235
|
+
return fig
|
@@ -0,0 +1,233 @@
|
|
1
|
+
# Copyright © 2023-2024 ValidMind Inc. All rights reserved.
|
2
|
+
# See the LICENSE file in the root of this repository for details.
|
3
|
+
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
|
+
|
5
|
+
from typing import List, Optional, Union
|
6
|
+
|
7
|
+
import numpy as np
|
8
|
+
import plotly.graph_objects as go
|
9
|
+
from plotly.subplots import make_subplots
|
10
|
+
from scipy import stats
|
11
|
+
|
12
|
+
from validmind import tags, tasks
|
13
|
+
from validmind.errors import SkipTestError
|
14
|
+
from validmind.vm_models import VMDataset
|
15
|
+
|
16
|
+
|
17
|
+
def _validate_columns(dataset: VMDataset, columns: Optional[List[str]]):
|
18
|
+
"""Validate and return numerical columns."""
|
19
|
+
if columns is None:
|
20
|
+
columns = dataset.feature_columns_numeric
|
21
|
+
else:
|
22
|
+
available_columns = set(dataset.feature_columns_numeric)
|
23
|
+
columns = [col for col in columns if col in available_columns]
|
24
|
+
|
25
|
+
if not columns:
|
26
|
+
raise SkipTestError("No numerical columns found for histogram plotting")
|
27
|
+
|
28
|
+
return columns
|
29
|
+
|
30
|
+
|
31
|
+
def _process_column_data(data, log_scale: bool, column: str):
|
32
|
+
"""Process column data and return plot data and xlabel."""
|
33
|
+
plot_data = data
|
34
|
+
xlabel = column
|
35
|
+
if log_scale and (data > 0).all():
|
36
|
+
plot_data = np.log10(data)
|
37
|
+
xlabel = f"log10({column})"
|
38
|
+
return plot_data, xlabel
|
39
|
+
|
40
|
+
|
41
|
+
def _add_histogram_trace(
|
42
|
+
fig, plot_data, bins, color, opacity, normalize, column, row, col
|
43
|
+
):
|
44
|
+
"""Add histogram trace to figure."""
|
45
|
+
histnorm = "probability density" if normalize else None
|
46
|
+
|
47
|
+
fig.add_trace(
|
48
|
+
go.Histogram(
|
49
|
+
x=plot_data,
|
50
|
+
nbinsx=bins if isinstance(bins, int) else None,
|
51
|
+
name=f"Histogram - {column}",
|
52
|
+
marker_color=color,
|
53
|
+
opacity=opacity,
|
54
|
+
histnorm=histnorm,
|
55
|
+
showlegend=False,
|
56
|
+
),
|
57
|
+
row=row,
|
58
|
+
col=col,
|
59
|
+
)
|
60
|
+
|
61
|
+
|
62
|
+
def _add_kde_trace(fig, plot_data, bins, normalize, column, row, col):
|
63
|
+
"""Add KDE trace to figure if possible."""
|
64
|
+
try:
|
65
|
+
kde = stats.gaussian_kde(plot_data)
|
66
|
+
x_range = np.linspace(plot_data.min(), plot_data.max(), 100)
|
67
|
+
kde_values = kde(x_range)
|
68
|
+
|
69
|
+
if not normalize:
|
70
|
+
hist_max = (
|
71
|
+
len(plot_data) / bins if isinstance(bins, int) else len(plot_data) / 30
|
72
|
+
)
|
73
|
+
kde_values = kde_values * hist_max / kde_values.max()
|
74
|
+
|
75
|
+
fig.add_trace(
|
76
|
+
go.Scatter(
|
77
|
+
x=x_range,
|
78
|
+
y=kde_values,
|
79
|
+
mode="lines",
|
80
|
+
name=f"KDE - {column}",
|
81
|
+
line=dict(color="red", width=2),
|
82
|
+
showlegend=False,
|
83
|
+
),
|
84
|
+
row=row,
|
85
|
+
col=col,
|
86
|
+
)
|
87
|
+
except Exception:
|
88
|
+
pass
|
89
|
+
|
90
|
+
|
91
|
+
def _add_stats_annotation(fig, data, idx, row, col):
|
92
|
+
"""Add statistics annotation to subplot."""
|
93
|
+
stats_text = f"Mean: {data.mean():.3f}<br>Std: {data.std():.3f}<br>N: {len(data)}"
|
94
|
+
fig.add_annotation(
|
95
|
+
text=stats_text,
|
96
|
+
x=0.02,
|
97
|
+
y=0.98,
|
98
|
+
xref=f"x{idx+1} domain" if idx > 0 else "x domain",
|
99
|
+
yref=f"y{idx+1} domain" if idx > 0 else "y domain",
|
100
|
+
showarrow=False,
|
101
|
+
align="left",
|
102
|
+
bgcolor="rgba(255,255,255,0.8)",
|
103
|
+
bordercolor="black",
|
104
|
+
borderwidth=1,
|
105
|
+
row=row,
|
106
|
+
col=col,
|
107
|
+
)
|
108
|
+
|
109
|
+
|
110
|
+
@tags("tabular_data", "visualization", "data_quality")
|
111
|
+
@tasks("classification", "regression", "clustering")
|
112
|
+
def HistogramPlot(
|
113
|
+
dataset: VMDataset,
|
114
|
+
columns: Optional[List[str]] = None,
|
115
|
+
bins: Union[int, str, List] = 30,
|
116
|
+
color: str = "steelblue",
|
117
|
+
opacity: float = 0.7,
|
118
|
+
show_kde: bool = True,
|
119
|
+
normalize: bool = False,
|
120
|
+
log_scale: bool = False,
|
121
|
+
title_prefix: str = "Histogram of",
|
122
|
+
width: int = 1200,
|
123
|
+
height: int = 800,
|
124
|
+
n_cols: int = 2,
|
125
|
+
vertical_spacing: float = 0.15,
|
126
|
+
horizontal_spacing: float = 0.1,
|
127
|
+
) -> go.Figure:
|
128
|
+
"""
|
129
|
+
Generates customizable histogram plots for numerical features in a dataset using Plotly.
|
130
|
+
|
131
|
+
### Purpose
|
132
|
+
|
133
|
+
This test provides a flexible way to visualize the distribution of numerical features in a dataset.
|
134
|
+
It allows for extensive customization of the histogram appearance and behavior through parameters,
|
135
|
+
making it suitable for various exploratory data analysis tasks.
|
136
|
+
|
137
|
+
### Test Mechanism
|
138
|
+
|
139
|
+
The test creates histogram plots for specified numerical columns (or all numerical columns if none specified).
|
140
|
+
It supports various customization options including:
|
141
|
+
- Number of bins or bin edges
|
142
|
+
- Color and opacity
|
143
|
+
- Kernel density estimation overlay
|
144
|
+
- Logarithmic scaling
|
145
|
+
- Normalization options
|
146
|
+
- Configurable subplot layout (columns and spacing)
|
147
|
+
|
148
|
+
### Signs of High Risk
|
149
|
+
|
150
|
+
- Highly skewed distributions that may indicate data quality issues
|
151
|
+
- Unexpected bimodal or multimodal distributions
|
152
|
+
- Presence of extreme outliers
|
153
|
+
- Empty or sparse distributions
|
154
|
+
|
155
|
+
### Strengths
|
156
|
+
|
157
|
+
- Highly customizable visualization options
|
158
|
+
- Interactive Plotly plots with zoom, pan, and hover capabilities
|
159
|
+
- Supports both single and multiple column analysis
|
160
|
+
- Provides insights into data distribution patterns
|
161
|
+
- Can handle different data types and scales
|
162
|
+
- Configurable subplot layout for better visualization
|
163
|
+
|
164
|
+
### Limitations
|
165
|
+
|
166
|
+
- Limited to numerical features only
|
167
|
+
- Visual interpretation may be subjective
|
168
|
+
- May not be suitable for high-dimensional datasets
|
169
|
+
- Performance may degrade with very large datasets
|
170
|
+
"""
|
171
|
+
# Validate inputs
|
172
|
+
columns = _validate_columns(dataset, columns)
|
173
|
+
|
174
|
+
# Calculate subplot layout
|
175
|
+
n_cols = min(n_cols, len(columns))
|
176
|
+
n_rows = (len(columns) + n_cols - 1) // n_cols
|
177
|
+
|
178
|
+
# Create subplots
|
179
|
+
subplot_titles = [f"{title_prefix} {col}" for col in columns]
|
180
|
+
fig = make_subplots(
|
181
|
+
rows=n_rows,
|
182
|
+
cols=n_cols,
|
183
|
+
subplot_titles=subplot_titles,
|
184
|
+
vertical_spacing=vertical_spacing,
|
185
|
+
horizontal_spacing=horizontal_spacing,
|
186
|
+
)
|
187
|
+
|
188
|
+
for idx, column in enumerate(columns):
|
189
|
+
row = (idx // n_cols) + 1
|
190
|
+
col = (idx % n_cols) + 1
|
191
|
+
data = dataset.df[column].dropna()
|
192
|
+
|
193
|
+
if len(data) == 0:
|
194
|
+
fig.add_annotation(
|
195
|
+
text=f"No data available<br>for {column}",
|
196
|
+
x=0.5,
|
197
|
+
y=0.5,
|
198
|
+
xref=f"x{idx+1}" if idx > 0 else "x",
|
199
|
+
yref=f"y{idx+1}" if idx > 0 else "y",
|
200
|
+
showarrow=False,
|
201
|
+
row=row,
|
202
|
+
col=col,
|
203
|
+
)
|
204
|
+
continue
|
205
|
+
|
206
|
+
# Process data
|
207
|
+
plot_data, xlabel = _process_column_data(data, log_scale, column)
|
208
|
+
|
209
|
+
# Add histogram
|
210
|
+
_add_histogram_trace(
|
211
|
+
fig, plot_data, bins, color, opacity, normalize, column, row, col
|
212
|
+
)
|
213
|
+
|
214
|
+
# Add KDE if requested
|
215
|
+
if show_kde and len(data) > 1:
|
216
|
+
_add_kde_trace(fig, plot_data, bins, normalize, column, row, col)
|
217
|
+
|
218
|
+
# Update axes and add annotations
|
219
|
+
fig.update_xaxes(title_text=xlabel, row=row, col=col)
|
220
|
+
ylabel = "Density" if normalize else "Frequency"
|
221
|
+
fig.update_yaxes(title_text=ylabel, row=row, col=col)
|
222
|
+
_add_stats_annotation(fig, data, idx, row, col)
|
223
|
+
|
224
|
+
# Update layout
|
225
|
+
fig.update_layout(
|
226
|
+
title_text="Dataset Feature Distributions",
|
227
|
+
showlegend=False,
|
228
|
+
width=width,
|
229
|
+
height=height,
|
230
|
+
template="plotly_white",
|
231
|
+
)
|
232
|
+
|
233
|
+
return fig
|
@@ -0,0 +1,125 @@
|
|
1
|
+
# Copyright © 2023-2024 ValidMind Inc. All rights reserved.
|
2
|
+
# See the LICENSE file in the root of this repository for details.
|
3
|
+
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
|
+
|
5
|
+
from typing import List, Optional
|
6
|
+
|
7
|
+
import plotly.express as px
|
8
|
+
|
9
|
+
from validmind import tags, tasks
|
10
|
+
from validmind.errors import SkipTestError
|
11
|
+
from validmind.vm_models import VMDataset
|
12
|
+
|
13
|
+
|
14
|
+
@tags("tabular_data", "visualization", "distribution")
|
15
|
+
@tasks("classification", "regression", "clustering")
|
16
|
+
def ViolinPlot(
|
17
|
+
dataset: VMDataset,
|
18
|
+
columns: Optional[List[str]] = None,
|
19
|
+
group_by: Optional[str] = None,
|
20
|
+
width: int = 800,
|
21
|
+
height: int = 600,
|
22
|
+
) -> px.violin:
|
23
|
+
"""
|
24
|
+
Generates interactive violin plots for numerical features using Plotly.
|
25
|
+
|
26
|
+
### Purpose
|
27
|
+
|
28
|
+
This test creates violin plots to visualize the distribution of numerical features,
|
29
|
+
showing both the probability density and summary statistics. Violin plots combine
|
30
|
+
aspects of box plots and kernel density estimation for rich distribution visualization.
|
31
|
+
|
32
|
+
### Test Mechanism
|
33
|
+
|
34
|
+
The test creates violin plots for specified numerical columns, with optional
|
35
|
+
grouping by categorical variables. Each violin shows the distribution shape,
|
36
|
+
quartiles, and median values.
|
37
|
+
|
38
|
+
### Signs of High Risk
|
39
|
+
|
40
|
+
- Multimodal distributions that might indicate mixed populations
|
41
|
+
- Highly skewed distributions suggesting data quality issues
|
42
|
+
- Large differences in distribution shapes across groups
|
43
|
+
- Unusual distribution patterns that contradict domain expectations
|
44
|
+
|
45
|
+
### Strengths
|
46
|
+
|
47
|
+
- Shows detailed distribution shape information
|
48
|
+
- Interactive Plotly visualization with hover details
|
49
|
+
- Effective for comparing distributions across groups
|
50
|
+
- Combines density estimation with quartile information
|
51
|
+
|
52
|
+
### Limitations
|
53
|
+
|
54
|
+
- Limited to numerical features only
|
55
|
+
- Requires sufficient data points for meaningful density estimation
|
56
|
+
- May not be suitable for discrete variables
|
57
|
+
- Can be misleading with very small sample sizes
|
58
|
+
"""
|
59
|
+
# Get numerical columns
|
60
|
+
if columns is None:
|
61
|
+
columns = dataset.feature_columns_numeric
|
62
|
+
else:
|
63
|
+
available_columns = set(dataset.feature_columns_numeric)
|
64
|
+
columns = [col for col in columns if col in available_columns]
|
65
|
+
|
66
|
+
if not columns:
|
67
|
+
raise SkipTestError("No numerical columns found for violin plot")
|
68
|
+
|
69
|
+
# For violin plots, we'll melt the data to long format
|
70
|
+
data = dataset.df[columns].dropna()
|
71
|
+
|
72
|
+
if len(data) == 0:
|
73
|
+
raise SkipTestError("No valid data available for violin plot")
|
74
|
+
|
75
|
+
# Melt the dataframe to long format
|
76
|
+
melted_data = data.melt(var_name="Feature", value_name="Value")
|
77
|
+
|
78
|
+
# Add group column if specified
|
79
|
+
if group_by and group_by in dataset.df.columns:
|
80
|
+
# Repeat group values for each feature
|
81
|
+
group_values = []
|
82
|
+
for column in columns:
|
83
|
+
column_data = dataset.df[[column, group_by]].dropna()
|
84
|
+
group_values.extend(column_data[group_by].tolist())
|
85
|
+
|
86
|
+
if len(group_values) == len(melted_data):
|
87
|
+
melted_data["Group"] = group_values
|
88
|
+
else:
|
89
|
+
group_by = None # Disable grouping if lengths don't match
|
90
|
+
|
91
|
+
# Create violin plot
|
92
|
+
if group_by and "Group" in melted_data.columns:
|
93
|
+
fig = px.violin(
|
94
|
+
melted_data,
|
95
|
+
x="Feature",
|
96
|
+
y="Value",
|
97
|
+
color="Group",
|
98
|
+
box=True,
|
99
|
+
title=f"Distribution of Features by {group_by}",
|
100
|
+
width=width,
|
101
|
+
height=height,
|
102
|
+
)
|
103
|
+
else:
|
104
|
+
fig = px.violin(
|
105
|
+
melted_data,
|
106
|
+
x="Feature",
|
107
|
+
y="Value",
|
108
|
+
box=True,
|
109
|
+
title="Feature Distributions",
|
110
|
+
width=width,
|
111
|
+
height=height,
|
112
|
+
)
|
113
|
+
|
114
|
+
# Update layout
|
115
|
+
fig.update_layout(
|
116
|
+
template="plotly_white",
|
117
|
+
title_x=0.5,
|
118
|
+
xaxis_title="Features",
|
119
|
+
yaxis_title="Values",
|
120
|
+
)
|
121
|
+
|
122
|
+
# Rotate x-axis labels for better readability
|
123
|
+
fig.update_xaxes(tickangle=45)
|
124
|
+
|
125
|
+
return fig
|
File without changes
|
@@ -2,6 +2,8 @@
|
|
2
2
|
# See the LICENSE file in the root of this repository for details.
|
3
3
|
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
4
|
|
5
|
+
from typing import Any, Dict, List, Tuple
|
6
|
+
|
5
7
|
from validmind import RawData, tags, tasks
|
6
8
|
from validmind.errors import MissingRequiredTestInputError
|
7
9
|
|
@@ -45,7 +47,9 @@ Prompt:
|
|
45
47
|
|
46
48
|
@tags("llm", "few_shot")
|
47
49
|
@tasks("text_classification", "text_summarization")
|
48
|
-
def Bias(
|
50
|
+
def Bias(
|
51
|
+
model, min_threshold=7, judge_llm=None
|
52
|
+
) -> Tuple[List[Dict[str, Any]], bool, RawData]:
|
49
53
|
"""
|
50
54
|
Assesses potential bias in a Large Language Model by analyzing the distribution and order of exemplars in the
|
51
55
|
prompt.
|
@@ -2,6 +2,8 @@
|
|
2
2
|
# See the LICENSE file in the root of this repository for details.
|
3
3
|
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
4
|
|
5
|
+
from typing import Any, Dict, List, Tuple
|
6
|
+
|
5
7
|
from validmind import RawData, tags, tasks
|
6
8
|
from validmind.errors import MissingRequiredTestInputError
|
7
9
|
|
@@ -46,7 +48,9 @@ Prompt:
|
|
46
48
|
|
47
49
|
@tags("llm", "zero_shot", "few_shot")
|
48
50
|
@tasks("text_classification", "text_summarization")
|
49
|
-
def Clarity(
|
51
|
+
def Clarity(
|
52
|
+
model, min_threshold=7, judge_llm=None
|
53
|
+
) -> Tuple[List[Dict[str, Any]], bool, RawData]:
|
50
54
|
"""
|
51
55
|
Evaluates and scores the clarity of prompts in a Large Language Model based on specified guidelines.
|
52
56
|
|
@@ -2,6 +2,8 @@
|
|
2
2
|
# See the LICENSE file in the root of this repository for details.
|
3
3
|
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
4
|
|
5
|
+
from typing import Any, Dict, List, Tuple
|
6
|
+
|
5
7
|
from validmind import RawData, tags, tasks
|
6
8
|
from validmind.errors import MissingRequiredTestInputError
|
7
9
|
|
@@ -54,7 +56,9 @@ Prompt:
|
|
54
56
|
|
55
57
|
@tags("llm", "zero_shot", "few_shot")
|
56
58
|
@tasks("text_classification", "text_summarization")
|
57
|
-
def Conciseness(
|
59
|
+
def Conciseness(
|
60
|
+
model, min_threshold=7, judge_llm=None
|
61
|
+
) -> Tuple[List[Dict[str, Any]], bool, RawData]:
|
58
62
|
"""
|
59
63
|
Analyzes and grades the conciseness of prompts provided to a Large Language Model.
|
60
64
|
|
@@ -2,6 +2,8 @@
|
|
2
2
|
# See the LICENSE file in the root of this repository for details.
|
3
3
|
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
4
|
|
5
|
+
from typing import Any, Dict, List, Tuple
|
6
|
+
|
5
7
|
from validmind import RawData, tags, tasks
|
6
8
|
from validmind.errors import MissingRequiredTestInputError
|
7
9
|
|
@@ -39,7 +41,9 @@ Prompt:
|
|
39
41
|
|
40
42
|
@tags("llm", "zero_shot", "few_shot")
|
41
43
|
@tasks("text_classification", "text_summarization")
|
42
|
-
def Delimitation(
|
44
|
+
def Delimitation(
|
45
|
+
model, min_threshold=7, judge_llm=None
|
46
|
+
) -> Tuple[List[Dict[str, Any]], bool, RawData]:
|
43
47
|
"""
|
44
48
|
Evaluates the proper use of delimiters in prompts provided to Large Language Models.
|
45
49
|
|
@@ -2,6 +2,8 @@
|
|
2
2
|
# See the LICENSE file in the root of this repository for details.
|
3
3
|
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
4
|
|
5
|
+
from typing import Any, Dict, List, Tuple
|
6
|
+
|
5
7
|
from validmind import RawData, tags, tasks
|
6
8
|
from validmind.errors import MissingRequiredTestInputError
|
7
9
|
|
@@ -52,7 +54,9 @@ Prompt:
|
|
52
54
|
|
53
55
|
@tags("llm", "zero_shot", "few_shot")
|
54
56
|
@tasks("text_classification", "text_summarization")
|
55
|
-
def NegativeInstruction(
|
57
|
+
def NegativeInstruction(
|
58
|
+
model, min_threshold=7, judge_llm=None
|
59
|
+
) -> Tuple[List[Dict[str, Any]], bool, RawData]:
|
56
60
|
"""
|
57
61
|
Evaluates and grades the use of affirmative, proactive language over negative instructions in LLM prompts.
|
58
62
|
|