validmind 2.5.25__py3-none-any.whl → 2.6.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- validmind/__init__.py +8 -17
- validmind/__version__.py +1 -1
- validmind/ai/test_descriptions.py +66 -85
- validmind/ai/test_result_description/context.py +2 -2
- validmind/ai/utils.py +26 -1
- validmind/api_client.py +43 -79
- validmind/client.py +5 -7
- validmind/client_config.py +1 -1
- validmind/datasets/__init__.py +1 -1
- validmind/datasets/classification/customer_churn.py +7 -5
- validmind/datasets/nlp/__init__.py +2 -2
- validmind/errors.py +6 -10
- validmind/html_templates/content_blocks.py +18 -16
- validmind/logging.py +21 -16
- validmind/tests/__init__.py +28 -5
- validmind/tests/__types__.py +186 -170
- validmind/tests/_store.py +7 -21
- validmind/tests/comparison.py +362 -0
- validmind/tests/data_validation/ACFandPACFPlot.py +44 -73
- validmind/tests/data_validation/ADF.py +49 -83
- validmind/tests/data_validation/AutoAR.py +59 -96
- validmind/tests/data_validation/AutoMA.py +59 -96
- validmind/tests/data_validation/AutoStationarity.py +66 -114
- validmind/tests/data_validation/ClassImbalance.py +48 -117
- validmind/tests/data_validation/DatasetDescription.py +180 -209
- validmind/tests/data_validation/DatasetSplit.py +50 -75
- validmind/tests/data_validation/DescriptiveStatistics.py +59 -85
- validmind/tests/data_validation/{DFGLSArch.py → DickeyFullerGLS.py} +44 -76
- validmind/tests/data_validation/Duplicates.py +21 -90
- validmind/tests/data_validation/EngleGrangerCoint.py +53 -75
- validmind/tests/data_validation/HighCardinality.py +32 -80
- validmind/tests/data_validation/HighPearsonCorrelation.py +29 -97
- validmind/tests/data_validation/IQROutliersBarPlot.py +63 -94
- validmind/tests/data_validation/IQROutliersTable.py +40 -80
- validmind/tests/data_validation/IsolationForestOutliers.py +41 -63
- validmind/tests/data_validation/KPSS.py +33 -81
- validmind/tests/data_validation/LaggedCorrelationHeatmap.py +47 -95
- validmind/tests/data_validation/MissingValues.py +17 -58
- validmind/tests/data_validation/MissingValuesBarPlot.py +61 -87
- validmind/tests/data_validation/PhillipsPerronArch.py +56 -79
- validmind/tests/data_validation/RollingStatsPlot.py +50 -81
- validmind/tests/data_validation/SeasonalDecompose.py +102 -184
- validmind/tests/data_validation/Skewness.py +27 -64
- validmind/tests/data_validation/SpreadPlot.py +34 -57
- validmind/tests/data_validation/TabularCategoricalBarPlots.py +46 -65
- validmind/tests/data_validation/TabularDateTimeHistograms.py +23 -45
- validmind/tests/data_validation/TabularNumericalHistograms.py +27 -46
- validmind/tests/data_validation/TargetRateBarPlots.py +54 -93
- validmind/tests/data_validation/TimeSeriesFrequency.py +48 -133
- validmind/tests/data_validation/TimeSeriesHistogram.py +24 -3
- validmind/tests/data_validation/TimeSeriesLinePlot.py +29 -47
- validmind/tests/data_validation/TimeSeriesMissingValues.py +59 -135
- validmind/tests/data_validation/TimeSeriesOutliers.py +54 -171
- validmind/tests/data_validation/TooManyZeroValues.py +21 -70
- validmind/tests/data_validation/UniqueRows.py +23 -62
- validmind/tests/data_validation/WOEBinPlots.py +83 -109
- validmind/tests/data_validation/WOEBinTable.py +28 -69
- validmind/tests/data_validation/ZivotAndrewsArch.py +33 -75
- validmind/tests/data_validation/nlp/CommonWords.py +49 -57
- validmind/tests/data_validation/nlp/Hashtags.py +27 -49
- validmind/tests/data_validation/nlp/LanguageDetection.py +7 -13
- validmind/tests/data_validation/nlp/Mentions.py +32 -63
- validmind/tests/data_validation/nlp/PolarityAndSubjectivity.py +89 -14
- validmind/tests/data_validation/nlp/Punctuations.py +63 -47
- validmind/tests/data_validation/nlp/Sentiment.py +4 -0
- validmind/tests/data_validation/nlp/StopWords.py +62 -91
- validmind/tests/data_validation/nlp/TextDescription.py +116 -159
- validmind/tests/data_validation/nlp/Toxicity.py +12 -4
- validmind/tests/decorator.py +33 -242
- validmind/tests/load.py +212 -153
- validmind/tests/model_validation/BertScore.py +13 -7
- validmind/tests/model_validation/BleuScore.py +4 -0
- validmind/tests/model_validation/ClusterSizeDistribution.py +24 -47
- validmind/tests/model_validation/ContextualRecall.py +3 -0
- validmind/tests/model_validation/FeaturesAUC.py +43 -74
- validmind/tests/model_validation/MeteorScore.py +3 -0
- validmind/tests/model_validation/RegardScore.py +5 -1
- validmind/tests/model_validation/RegressionResidualsPlot.py +54 -75
- validmind/tests/model_validation/embeddings/ClusterDistribution.py +10 -33
- validmind/tests/model_validation/embeddings/CosineSimilarityDistribution.py +11 -29
- validmind/tests/model_validation/embeddings/DescriptiveAnalytics.py +19 -31
- validmind/tests/model_validation/embeddings/EmbeddingsVisualization2D.py +40 -49
- validmind/tests/model_validation/embeddings/StabilityAnalysisKeyword.py +29 -15
- validmind/tests/model_validation/embeddings/StabilityAnalysisRandomNoise.py +25 -11
- validmind/tests/model_validation/embeddings/StabilityAnalysisSynonyms.py +28 -13
- validmind/tests/model_validation/embeddings/StabilityAnalysisTranslation.py +67 -38
- validmind/tests/model_validation/embeddings/utils.py +53 -0
- validmind/tests/model_validation/ragas/AnswerCorrectness.py +37 -32
- validmind/tests/model_validation/ragas/{AspectCritique.py → AspectCritic.py} +33 -27
- validmind/tests/model_validation/ragas/ContextEntityRecall.py +44 -41
- validmind/tests/model_validation/ragas/ContextPrecision.py +40 -35
- validmind/tests/model_validation/ragas/ContextPrecisionWithoutReference.py +133 -0
- validmind/tests/model_validation/ragas/ContextRecall.py +40 -35
- validmind/tests/model_validation/ragas/Faithfulness.py +42 -30
- validmind/tests/model_validation/ragas/NoiseSensitivity.py +59 -35
- validmind/tests/model_validation/ragas/{AnswerRelevance.py → ResponseRelevancy.py} +52 -41
- validmind/tests/model_validation/ragas/{AnswerSimilarity.py → SemanticSimilarity.py} +39 -34
- validmind/tests/model_validation/sklearn/AdjustedMutualInformation.py +13 -16
- validmind/tests/model_validation/sklearn/AdjustedRandIndex.py +13 -16
- validmind/tests/model_validation/sklearn/ClassifierPerformance.py +51 -89
- validmind/tests/model_validation/sklearn/ClusterCosineSimilarity.py +31 -61
- validmind/tests/model_validation/sklearn/ClusterPerformanceMetrics.py +118 -83
- validmind/tests/model_validation/sklearn/CompletenessScore.py +13 -16
- validmind/tests/model_validation/sklearn/ConfusionMatrix.py +62 -94
- validmind/tests/model_validation/sklearn/FeatureImportance.py +7 -8
- validmind/tests/model_validation/sklearn/FowlkesMallowsScore.py +12 -15
- validmind/tests/model_validation/sklearn/HomogeneityScore.py +12 -15
- validmind/tests/model_validation/sklearn/HyperParametersTuning.py +23 -53
- validmind/tests/model_validation/sklearn/KMeansClustersOptimization.py +60 -74
- validmind/tests/model_validation/sklearn/MinimumAccuracy.py +16 -84
- validmind/tests/model_validation/sklearn/MinimumF1Score.py +22 -72
- validmind/tests/model_validation/sklearn/MinimumROCAUCScore.py +29 -78
- validmind/tests/model_validation/sklearn/ModelsPerformanceComparison.py +52 -82
- validmind/tests/model_validation/sklearn/OverfitDiagnosis.py +51 -145
- validmind/tests/model_validation/sklearn/PermutationFeatureImportance.py +60 -78
- validmind/tests/model_validation/sklearn/PopulationStabilityIndex.py +130 -172
- validmind/tests/model_validation/sklearn/PrecisionRecallCurve.py +26 -55
- validmind/tests/model_validation/sklearn/ROCCurve.py +43 -77
- validmind/tests/model_validation/sklearn/RegressionPerformance.py +41 -94
- validmind/tests/model_validation/sklearn/RobustnessDiagnosis.py +47 -136
- validmind/tests/model_validation/sklearn/SHAPGlobalImportance.py +164 -208
- validmind/tests/model_validation/sklearn/SilhouettePlot.py +54 -99
- validmind/tests/model_validation/sklearn/TrainingTestDegradation.py +50 -124
- validmind/tests/model_validation/sklearn/VMeasure.py +12 -15
- validmind/tests/model_validation/sklearn/WeakspotsDiagnosis.py +225 -281
- validmind/tests/model_validation/statsmodels/AutoARIMA.py +40 -45
- validmind/tests/model_validation/statsmodels/KolmogorovSmirnov.py +22 -47
- validmind/tests/model_validation/statsmodels/Lilliefors.py +17 -28
- validmind/tests/model_validation/statsmodels/RegressionFeatureSignificance.py +37 -81
- validmind/tests/model_validation/statsmodels/RegressionModelForecastPlot.py +37 -105
- validmind/tests/model_validation/statsmodels/RegressionModelForecastPlotLevels.py +62 -166
- validmind/tests/model_validation/statsmodels/RegressionModelSensitivityPlot.py +57 -119
- validmind/tests/model_validation/statsmodels/RegressionModelSummary.py +20 -57
- validmind/tests/model_validation/statsmodels/RegressionPermutationFeatureImportance.py +47 -80
- validmind/tests/ongoing_monitoring/PredictionCorrelation.py +2 -0
- validmind/tests/ongoing_monitoring/TargetPredictionDistributionPlot.py +4 -2
- validmind/tests/output.py +120 -0
- validmind/tests/prompt_validation/Bias.py +55 -98
- validmind/tests/prompt_validation/Clarity.py +56 -99
- validmind/tests/prompt_validation/Conciseness.py +63 -101
- validmind/tests/prompt_validation/Delimitation.py +48 -89
- validmind/tests/prompt_validation/NegativeInstruction.py +62 -96
- validmind/tests/prompt_validation/Robustness.py +80 -121
- validmind/tests/prompt_validation/Specificity.py +61 -95
- validmind/tests/prompt_validation/ai_powered_test.py +2 -2
- validmind/tests/run.py +314 -496
- validmind/tests/test_providers.py +109 -79
- validmind/tests/utils.py +91 -0
- validmind/unit_metrics/__init__.py +16 -155
- validmind/unit_metrics/classification/F1.py +1 -0
- validmind/unit_metrics/classification/Precision.py +1 -0
- validmind/unit_metrics/classification/ROC_AUC.py +1 -0
- validmind/unit_metrics/classification/Recall.py +1 -0
- validmind/unit_metrics/regression/AdjustedRSquaredScore.py +1 -0
- validmind/unit_metrics/regression/GiniCoefficient.py +1 -0
- validmind/unit_metrics/regression/HuberLoss.py +1 -0
- validmind/unit_metrics/regression/KolmogorovSmirnovStatistic.py +1 -0
- validmind/unit_metrics/regression/MeanAbsoluteError.py +1 -0
- validmind/unit_metrics/regression/MeanAbsolutePercentageError.py +1 -0
- validmind/unit_metrics/regression/MeanBiasDeviation.py +1 -0
- validmind/unit_metrics/regression/MeanSquaredError.py +1 -0
- validmind/unit_metrics/regression/QuantileLoss.py +1 -0
- validmind/unit_metrics/regression/RSquaredScore.py +2 -1
- validmind/unit_metrics/regression/RootMeanSquaredError.py +1 -0
- validmind/utils.py +66 -17
- validmind/vm_models/__init__.py +2 -17
- validmind/vm_models/dataset/dataset.py +31 -4
- validmind/vm_models/figure.py +7 -37
- validmind/vm_models/model.py +3 -0
- validmind/vm_models/result/__init__.py +7 -0
- validmind/vm_models/result/result.jinja +21 -0
- validmind/vm_models/result/result.py +337 -0
- validmind/vm_models/result/utils.py +160 -0
- validmind/vm_models/test_suite/runner.py +16 -54
- validmind/vm_models/test_suite/summary.py +3 -3
- validmind/vm_models/test_suite/test.py +43 -77
- validmind/vm_models/test_suite/test_suite.py +8 -40
- validmind-2.6.8.dist-info/METADATA +137 -0
- {validmind-2.5.25.dist-info → validmind-2.6.8.dist-info}/RECORD +182 -189
- validmind/tests/data_validation/AutoSeasonality.py +0 -190
- validmind/tests/metadata.py +0 -59
- validmind/tests/model_validation/embeddings/StabilityAnalysis.py +0 -176
- validmind/tests/model_validation/ragas/ContextUtilization.py +0 -161
- validmind/tests/model_validation/sklearn/ClusterPerformance.py +0 -80
- validmind/unit_metrics/composite.py +0 -238
- validmind/vm_models/test/metric.py +0 -98
- validmind/vm_models/test/metric_result.py +0 -61
- validmind/vm_models/test/output_template.py +0 -55
- validmind/vm_models/test/result_summary.py +0 -76
- validmind/vm_models/test/result_wrapper.py +0 -488
- validmind/vm_models/test/test.py +0 -103
- validmind/vm_models/test/threshold_test.py +0 -106
- validmind/vm_models/test/threshold_test_result.py +0 -75
- validmind/vm_models/test_context.py +0 -259
- validmind-2.5.25.dist-info/METADATA +0 -118
- {validmind-2.5.25.dist-info → validmind-2.6.8.dist-info}/LICENSE +0 -0
- {validmind-2.5.25.dist-info → validmind-2.6.8.dist-info}/WHEEL +0 -0
- {validmind-2.5.25.dist-info → validmind-2.6.8.dist-info}/entry_points.txt +0 -0
@@ -2,22 +2,22 @@
|
|
2
2
|
# See the LICENSE file in the root of this repository for details.
|
3
3
|
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
4
|
|
5
|
-
from dataclasses import dataclass
|
6
|
-
|
7
5
|
import numpy as np
|
8
6
|
import pandas as pd
|
9
7
|
import plotly.graph_objects as go
|
10
8
|
from sklearn.metrics import roc_auc_score
|
11
9
|
|
10
|
+
from validmind import tags, tasks
|
12
11
|
from validmind.errors import SkipTestError
|
13
12
|
from validmind.logging import get_logger
|
14
|
-
from validmind.vm_models import
|
13
|
+
from validmind.vm_models import VMDataset
|
15
14
|
|
16
15
|
logger = get_logger(__name__)
|
17
16
|
|
18
17
|
|
19
|
-
@
|
20
|
-
|
18
|
+
@tags("feature_importance", "AUC", "visualization")
|
19
|
+
@tasks("classification")
|
20
|
+
def FeaturesAUC(dataset: VMDataset, fontsize: int = 12, figure_height: int = 500):
|
21
21
|
"""
|
22
22
|
Evaluates the discriminatory power of each individual feature within a binary classification model by calculating
|
23
23
|
the Area Under the Curve (AUC) for each feature separately.
|
@@ -57,73 +57,42 @@ class FeaturesAUC(Metric):
|
|
57
57
|
- This metric is applicable only to binary classification tasks and cannot be directly extended to multiclass
|
58
58
|
classification or regression without modifications.
|
59
59
|
"""
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
aucs = pd.DataFrame(index=x.columns, columns=["AUC"])
|
84
|
-
|
85
|
-
for column in x.columns:
|
86
|
-
feature_values = x[column]
|
87
|
-
if feature_values.nunique() > 1:
|
88
|
-
auc_score = roc_auc_score(y, feature_values)
|
89
|
-
aucs.loc[column, "AUC"] = auc_score
|
90
|
-
else:
|
91
|
-
aucs.loc[
|
92
|
-
column, "AUC"
|
93
|
-
] = np.nan # Not enough unique values to calculate AUC
|
94
|
-
|
95
|
-
# Sorting the AUC scores in descending order
|
96
|
-
sorted_indices = aucs["AUC"].dropna().sort_values(ascending=False).index
|
97
|
-
|
98
|
-
# Plotting the results
|
99
|
-
fig = go.Figure()
|
100
|
-
fig.add_trace(
|
101
|
-
go.Bar(
|
102
|
-
y=[column for column in sorted_indices],
|
103
|
-
x=[aucs.loc[column, "AUC"] for column in sorted_indices],
|
104
|
-
orientation="h",
|
105
|
-
)
|
106
|
-
)
|
107
|
-
fig.update_layout(
|
108
|
-
title_text="Feature AUC Scores",
|
109
|
-
yaxis=dict(
|
110
|
-
tickmode="linear",
|
111
|
-
dtick=1,
|
112
|
-
tickfont=dict(size=self.params["fontsize"]),
|
113
|
-
title="Features",
|
114
|
-
autorange="reversed", # Ensure that the highest AUC is at the top
|
115
|
-
),
|
116
|
-
xaxis=dict(title="AUC"),
|
117
|
-
height=self.params["figure_height"],
|
118
|
-
)
|
119
|
-
|
120
|
-
return self.cache_results(
|
121
|
-
metric_value=aucs.to_dict(),
|
122
|
-
figures=[
|
123
|
-
Figure(
|
124
|
-
for_object=self,
|
125
|
-
key="features_auc",
|
126
|
-
figure=fig,
|
127
|
-
),
|
128
|
-
],
|
60
|
+
if len(np.unique(dataset.y)) != 2:
|
61
|
+
raise SkipTestError("FeaturesAUC metric requires a binary target variable.")
|
62
|
+
|
63
|
+
aucs = pd.DataFrame(index=dataset.feature_columns, columns=["AUC"])
|
64
|
+
|
65
|
+
for column in dataset.feature_columns:
|
66
|
+
feature_values = dataset.df[column]
|
67
|
+
if feature_values.nunique() > 1 and pd.api.types.is_numeric_dtype(
|
68
|
+
feature_values
|
69
|
+
):
|
70
|
+
aucs.loc[column, "AUC"] = roc_auc_score(dataset.y, feature_values)
|
71
|
+
else:
|
72
|
+
# Not enough unique values to calculate AUC
|
73
|
+
aucs.loc[column, "AUC"] = np.nan
|
74
|
+
|
75
|
+
sorted_indices = aucs["AUC"].dropna().sort_values(ascending=False).index
|
76
|
+
|
77
|
+
fig = go.Figure()
|
78
|
+
fig.add_trace(
|
79
|
+
go.Bar(
|
80
|
+
y=[column for column in sorted_indices],
|
81
|
+
x=[aucs.loc[column, "AUC"] for column in sorted_indices],
|
82
|
+
orientation="h",
|
129
83
|
)
|
84
|
+
)
|
85
|
+
fig.update_layout(
|
86
|
+
title_text="Feature AUC Scores",
|
87
|
+
yaxis=dict(
|
88
|
+
tickmode="linear",
|
89
|
+
dtick=1,
|
90
|
+
tickfont=dict(size=fontsize),
|
91
|
+
title="Features",
|
92
|
+
autorange="reversed", # Ensure that the highest AUC is at the top
|
93
|
+
),
|
94
|
+
xaxis=dict(title="AUC"),
|
95
|
+
height=figure_height,
|
96
|
+
)
|
97
|
+
|
98
|
+
return fig
|
@@ -7,6 +7,7 @@ import pandas as pd
|
|
7
7
|
import plotly.graph_objects as go
|
8
8
|
|
9
9
|
from validmind import tags, tasks
|
10
|
+
from validmind.tests.utils import validate_prediction
|
10
11
|
|
11
12
|
|
12
13
|
@tags("nlp", "text_data", "visualization")
|
@@ -65,6 +66,8 @@ def MeteorScore(dataset, model):
|
|
65
66
|
y_true = dataset.y
|
66
67
|
y_pred = dataset.y_pred(model)
|
67
68
|
|
69
|
+
validate_prediction(y_true, y_pred)
|
70
|
+
|
68
71
|
# Load the METEOR evaluation metric
|
69
72
|
meteor = evaluate.load("meteor")
|
70
73
|
|
@@ -7,6 +7,7 @@ import pandas as pd
|
|
7
7
|
import plotly.graph_objects as go
|
8
8
|
|
9
9
|
from validmind import tags, tasks
|
10
|
+
from validmind.tests.utils import validate_prediction
|
10
11
|
|
11
12
|
|
12
13
|
@tags("nlp", "text_data", "visualization")
|
@@ -56,8 +57,11 @@ def RegardScore(dataset, model):
|
|
56
57
|
y_true = dataset.y
|
57
58
|
y_pred = dataset.y_pred(model)
|
58
59
|
|
60
|
+
# Ensure equal lengths and get truncated data if necessary
|
61
|
+
y_true, y_pred = validate_prediction(y_true, y_pred)
|
62
|
+
|
59
63
|
# Load the regard evaluation metric
|
60
|
-
regard_tool = evaluate.load("regard")
|
64
|
+
regard_tool = evaluate.load("regard", module_type="measurement")
|
61
65
|
|
62
66
|
# Function to calculate regard scores
|
63
67
|
def compute_regard_scores(texts):
|
@@ -2,17 +2,17 @@
|
|
2
2
|
# See the LICENSE file in the root of this repository for details.
|
3
3
|
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
4
|
|
5
|
-
from dataclasses import dataclass
|
6
|
-
|
7
5
|
import numpy as np
|
8
6
|
import plotly.figure_factory as ff
|
9
7
|
import plotly.graph_objects as go
|
10
8
|
|
11
|
-
from validmind
|
9
|
+
from validmind import tags, tasks
|
10
|
+
from validmind.vm_models import VMDataset, VMModel
|
12
11
|
|
13
12
|
|
14
|
-
@
|
15
|
-
|
13
|
+
@tags("model_performance", "visualization")
|
14
|
+
@tasks("regression")
|
15
|
+
def RegressionResidualsPlot(model: VMModel, dataset: VMDataset, bin_size: float = 0.1):
|
16
16
|
"""
|
17
17
|
Evaluates regression model performance using residual distribution and actual vs. predicted plots.
|
18
18
|
|
@@ -54,75 +54,54 @@ class RegressionResidualsPlot(Metric):
|
|
54
54
|
- Does not summarize model performance into a single quantifiable metric, which might be needed for comparative or
|
55
55
|
summary analyses.
|
56
56
|
"""
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
name="Perfect Fit",
|
105
|
-
line=dict(color="red", dash="dash"),
|
57
|
+
y_true = dataset.y
|
58
|
+
y_pred = dataset.y_pred(model)
|
59
|
+
|
60
|
+
figures = []
|
61
|
+
|
62
|
+
# Residuals plot
|
63
|
+
fig = ff.create_distplot(
|
64
|
+
hist_data=[y_true.flatten() - y_pred.flatten()],
|
65
|
+
group_labels=["Residuals"],
|
66
|
+
bin_size=[bin_size],
|
67
|
+
show_hist=True,
|
68
|
+
show_rug=False,
|
69
|
+
)
|
70
|
+
fig.update_layout(
|
71
|
+
title="Distribution of Residuals",
|
72
|
+
xaxis_title="Residuals",
|
73
|
+
yaxis_title="Density",
|
74
|
+
)
|
75
|
+
figures.append(fig)
|
76
|
+
|
77
|
+
# True vs Predicted w/ perfect fit line plot
|
78
|
+
max_val = np.nanmax([np.nanmax(y_true), np.nanmax(y_pred)])
|
79
|
+
min_val = np.nanmin([np.nanmin(y_true), np.nanmin(y_pred)])
|
80
|
+
figures.append(
|
81
|
+
go.Figure(
|
82
|
+
data=[
|
83
|
+
go.Scatter(
|
84
|
+
x=y_true.flatten(),
|
85
|
+
y=y_pred.flatten(),
|
86
|
+
mode="markers",
|
87
|
+
name="True vs Predicted",
|
88
|
+
marker=dict(color="blue", opacity=0.5),
|
89
|
+
),
|
90
|
+
go.Scatter(
|
91
|
+
x=[min_val, max_val],
|
92
|
+
y=[min_val, max_val],
|
93
|
+
mode="lines",
|
94
|
+
name="Perfect Fit",
|
95
|
+
line=dict(color="red", dash="dash"),
|
96
|
+
),
|
97
|
+
],
|
98
|
+
layout=go.Layout(
|
99
|
+
title="True vs. Predicted Values",
|
100
|
+
xaxis_title="True Values",
|
101
|
+
yaxis_title="Predicted Values",
|
102
|
+
showlegend=True,
|
103
|
+
),
|
106
104
|
)
|
105
|
+
)
|
107
106
|
|
108
|
-
|
109
|
-
layout = go.Layout(
|
110
|
-
title="True vs. Predicted Values",
|
111
|
-
xaxis_title="True Values",
|
112
|
-
yaxis_title="Predicted Values",
|
113
|
-
showlegend=True,
|
114
|
-
)
|
115
|
-
|
116
|
-
fig = go.Figure(data=[scatter, line], layout=layout)
|
117
|
-
|
118
|
-
figures.append(
|
119
|
-
Figure(
|
120
|
-
for_object=self,
|
121
|
-
key=self.key,
|
122
|
-
figure=fig,
|
123
|
-
)
|
124
|
-
)
|
125
|
-
|
126
|
-
return self.cache_results(
|
127
|
-
figures=figures,
|
128
|
-
)
|
107
|
+
return tuple(figures)
|
@@ -5,10 +5,13 @@
|
|
5
5
|
import plotly.express as px
|
6
6
|
from sklearn.cluster import KMeans
|
7
7
|
|
8
|
-
from validmind
|
8
|
+
from validmind import tags, tasks
|
9
|
+
from validmind.vm_models import VMDataset, VMModel
|
9
10
|
|
10
11
|
|
11
|
-
|
12
|
+
@tags("llm", "text_data", "embeddings", "visualization")
|
13
|
+
@tasks("feature_extraction")
|
14
|
+
def ClusterDistribution(model: VMModel, dataset: VMDataset, num_clusters: int = 5):
|
12
15
|
"""
|
13
16
|
Assesses the distribution of text embeddings across clusters produced by a model using KMeans clustering.
|
14
17
|
|
@@ -49,34 +52,8 @@ class ClusterDistribution(Metric):
|
|
49
52
|
- Uses the KMeans clustering algorithm, which assumes that clusters are convex and isotropic, and may not work as
|
50
53
|
intended if the true clusters in the data are not of this shape.
|
51
54
|
"""
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
}
|
58
|
-
tasks = ["feature_extraction"]
|
59
|
-
tags = ["llm", "text_data", "embeddings", "visualization"]
|
60
|
-
|
61
|
-
def run(self):
|
62
|
-
# run kmeans clustering on embeddings
|
63
|
-
kmeans = KMeans(n_clusters=self.params["num_clusters"]).fit(
|
64
|
-
self.inputs.dataset.y_pred(self.inputs.model)
|
65
|
-
)
|
66
|
-
|
67
|
-
# plot the distribution
|
68
|
-
fig = px.histogram(
|
69
|
-
kmeans.labels_,
|
70
|
-
nbins=self.params["num_clusters"],
|
71
|
-
title="Embeddings Cluster Distribution",
|
72
|
-
)
|
73
|
-
|
74
|
-
return self.cache_results(
|
75
|
-
figures=[
|
76
|
-
Figure(
|
77
|
-
for_object=self,
|
78
|
-
key=self.key,
|
79
|
-
figure=fig,
|
80
|
-
)
|
81
|
-
],
|
82
|
-
)
|
55
|
+
return px.histogram(
|
56
|
+
KMeans(n_clusters=num_clusters).fit(dataset.y_pred(model)).labels_,
|
57
|
+
nbins=num_clusters,
|
58
|
+
title="Embeddings Cluster Distribution",
|
59
|
+
)
|
@@ -5,10 +5,13 @@
|
|
5
5
|
import plotly.express as px
|
6
6
|
from sklearn.metrics.pairwise import cosine_similarity
|
7
7
|
|
8
|
-
from validmind
|
8
|
+
from validmind import tags, tasks
|
9
|
+
from validmind.vm_models import VMDataset, VMModel
|
9
10
|
|
10
11
|
|
11
|
-
|
12
|
+
@tags("llm", "text_data", "embeddings", "visualization")
|
13
|
+
@tasks("feature_extraction")
|
14
|
+
def CosineSimilarityDistribution(dataset: VMDataset, model: VMModel):
|
12
15
|
"""
|
13
16
|
Assesses the similarity between predicted text embeddings from a model using a Cosine Similarity distribution
|
14
17
|
histogram.
|
@@ -49,30 +52,9 @@ class CosineSimilarityDistribution(Metric):
|
|
49
52
|
- The output is sensitive to the choice of bin number for the histogram. Different bin numbers could give a
|
50
53
|
slightly altered perspective on the distribution of cosine similarity.
|
51
54
|
"""
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
def run(self):
|
59
|
-
# Compute cosine similarity
|
60
|
-
similarities = cosine_similarity(self.inputs.dataset.y_pred(self.inputs.model))
|
61
|
-
|
62
|
-
# plot the distribution
|
63
|
-
fig = px.histogram(
|
64
|
-
x=similarities.flatten(),
|
65
|
-
nbins=100,
|
66
|
-
title="Cosine Similarity Distribution",
|
67
|
-
labels={"x": "Cosine Similarity"},
|
68
|
-
)
|
69
|
-
|
70
|
-
return self.cache_results(
|
71
|
-
figures=[
|
72
|
-
Figure(
|
73
|
-
for_object=self,
|
74
|
-
key=self.key,
|
75
|
-
figure=fig,
|
76
|
-
)
|
77
|
-
],
|
78
|
-
)
|
55
|
+
return px.histogram(
|
56
|
+
x=cosine_similarity(dataset.y_pred(model)).flatten(),
|
57
|
+
nbins=100,
|
58
|
+
title="Cosine Similarity Distribution",
|
59
|
+
labels={"x": "Cosine Similarity"},
|
60
|
+
)
|
@@ -5,10 +5,13 @@
|
|
5
5
|
import numpy as np
|
6
6
|
import plotly.express as px
|
7
7
|
|
8
|
-
from validmind
|
8
|
+
from validmind import tags, tasks
|
9
|
+
from validmind.vm_models import VMDataset, VMModel
|
9
10
|
|
10
11
|
|
11
|
-
|
12
|
+
@tags("llm", "text_data", "embeddings", "visualization")
|
13
|
+
@tasks("feature_extraction")
|
14
|
+
def DescriptiveAnalytics(dataset: VMDataset, model: VMModel):
|
12
15
|
"""
|
13
16
|
Evaluates statistical properties of text embeddings in an ML model via mean, median, and standard deviation
|
14
17
|
histograms.
|
@@ -52,32 +55,17 @@ class DescriptiveAnalytics(Metric):
|
|
52
55
|
- While it displays valuable information about the central tendency and spread of data, it does not provide
|
53
56
|
information about correlations between different embedding dimensions.
|
54
57
|
"""
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
# Plot histograms of the calculated statistics
|
71
|
-
mean_fig = px.histogram(x=means, title="Distribution of Embedding Means")
|
72
|
-
median_fig = px.histogram(x=medians, title="Distribution of Embedding Medians")
|
73
|
-
std_fig = px.histogram(
|
74
|
-
x=stds, title="Distribution of Embedding Standard Deviations"
|
75
|
-
)
|
76
|
-
|
77
|
-
return self.cache_results(
|
78
|
-
figures=[
|
79
|
-
Figure(for_object=self, key=f"{self.key}_mean", figure=mean_fig),
|
80
|
-
Figure(for_object=self, key=f"{self.key}_median", figure=median_fig),
|
81
|
-
Figure(for_object=self, key=f"{self.key}_std", figure=std_fig),
|
82
|
-
],
|
83
|
-
)
|
58
|
+
return (
|
59
|
+
px.histogram(
|
60
|
+
x=np.mean(dataset.y_pred(model), axis=0),
|
61
|
+
title="Distribution of Embedding Means",
|
62
|
+
),
|
63
|
+
px.histogram(
|
64
|
+
x=np.median(dataset.y_pred(model), axis=0),
|
65
|
+
title="Distribution of Embedding Medians",
|
66
|
+
),
|
67
|
+
px.histogram(
|
68
|
+
x=np.std(dataset.y_pred(model), axis=0),
|
69
|
+
title="Distribution of Embedding Standard Deviations",
|
70
|
+
),
|
71
|
+
)
|
@@ -2,13 +2,26 @@
|
|
2
2
|
# See the LICENSE file in the root of this repository for details.
|
3
3
|
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
4
|
|
5
|
+
from typing import Union
|
6
|
+
|
5
7
|
import plotly.express as px
|
6
8
|
from sklearn.manifold import TSNE
|
7
9
|
|
8
|
-
from validmind
|
10
|
+
from validmind import tags, tasks
|
11
|
+
from validmind.logging import get_logger
|
12
|
+
from validmind.vm_models import VMDataset, VMModel
|
13
|
+
|
14
|
+
logger = get_logger(__name__)
|
9
15
|
|
10
16
|
|
11
|
-
|
17
|
+
@tags("llm", "text_data", "embeddings", "visualization")
|
18
|
+
@tasks("feature_extraction")
|
19
|
+
def EmbeddingsVisualization2D(
|
20
|
+
model: VMModel,
|
21
|
+
dataset: VMDataset,
|
22
|
+
cluster_column: Union[str, None] = None,
|
23
|
+
perplexity: int = 30,
|
24
|
+
):
|
12
25
|
"""
|
13
26
|
Visualizes 2D representation of text embeddings generated by a model using t-SNE technique.
|
14
27
|
|
@@ -50,52 +63,30 @@ class EmbeddingsVisualization2D(Metric):
|
|
50
63
|
- It is designed for visual exploration and not for downstream tasks; that is, the 2D embeddings generated should
|
51
64
|
not be directly used for further training or analysis.
|
52
65
|
"""
|
66
|
+
y_pred = dataset.y_pred(model)
|
67
|
+
|
68
|
+
num_samples = len(y_pred)
|
69
|
+
perplexity = perplexity if perplexity < num_samples else num_samples - 1
|
70
|
+
|
71
|
+
reduced_embeddings = TSNE(
|
72
|
+
n_components=2,
|
73
|
+
perplexity=perplexity,
|
74
|
+
).fit_transform(y_pred)
|
53
75
|
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
"cluster_column"
|
58
|
-
|
76
|
+
if not cluster_column and len(dataset.feature_columns_categorical) == 1:
|
77
|
+
cluster_column = dataset.feature_columns_categorical[0]
|
78
|
+
else:
|
79
|
+
logger.warning("Cannot color code embeddings without a 'cluster_column' param.")
|
80
|
+
|
81
|
+
scatter_kwargs = {
|
82
|
+
"x": reduced_embeddings[:, 0],
|
83
|
+
"y": reduced_embeddings[:, 1],
|
84
|
+
"title": "2D Visualization of Text Embeddings",
|
59
85
|
}
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
raise ValueError(
|
68
|
-
"The `cluster_column` parameter must be provided to the EmbeddingsVisualization2D test."
|
69
|
-
)
|
70
|
-
|
71
|
-
# use TSNE to reduce dimensionality of embeddings
|
72
|
-
num_samples = len(self.inputs.dataset.y_pred(self.inputs.model))
|
73
|
-
|
74
|
-
if self.params["perplexity"] >= num_samples:
|
75
|
-
perplexity = num_samples - 1
|
76
|
-
else:
|
77
|
-
perplexity = self.params["perplexity"]
|
78
|
-
|
79
|
-
reduced_embeddings = TSNE(
|
80
|
-
n_components=2,
|
81
|
-
perplexity=perplexity,
|
82
|
-
).fit_transform(self.inputs.model.y_test_predict.values)
|
83
|
-
|
84
|
-
# create a scatter plot from the reduced embeddings
|
85
|
-
fig = px.scatter(
|
86
|
-
x=reduced_embeddings[:, 0],
|
87
|
-
y=reduced_embeddings[:, 1],
|
88
|
-
color=self.inputs.dataset.df[cluster_column],
|
89
|
-
title="2D Visualization of Text Embeddings",
|
90
|
-
)
|
91
|
-
fig.update_layout(width=500, height=500)
|
92
|
-
|
93
|
-
return self.cache_results(
|
94
|
-
figures=[
|
95
|
-
Figure(
|
96
|
-
for_object=self,
|
97
|
-
key=self.key,
|
98
|
-
figure=fig,
|
99
|
-
)
|
100
|
-
],
|
101
|
-
)
|
86
|
+
if cluster_column:
|
87
|
+
scatter_kwargs["color"] = dataset.df[cluster_column]
|
88
|
+
|
89
|
+
fig = px.scatter(**scatter_kwargs)
|
90
|
+
fig.update_layout(width=500, height=500)
|
91
|
+
|
92
|
+
return fig
|