validmind 2.8.10__py3-none-any.whl → 2.8.12__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- validmind/__version__.py +1 -1
- validmind/ai/test_descriptions.py +4 -2
- validmind/tests/data_validation/ACFandPACFPlot.py +4 -1
- validmind/tests/data_validation/AutoMA.py +1 -1
- validmind/tests/data_validation/BivariateScatterPlots.py +5 -1
- validmind/tests/data_validation/BoxPierce.py +3 -1
- validmind/tests/data_validation/ClassImbalance.py +1 -1
- validmind/tests/data_validation/DatasetDescription.py +1 -1
- validmind/tests/data_validation/DickeyFullerGLS.py +1 -1
- validmind/tests/data_validation/FeatureTargetCorrelationPlot.py +1 -1
- validmind/tests/data_validation/HighCardinality.py +5 -1
- validmind/tests/data_validation/HighPearsonCorrelation.py +1 -1
- validmind/tests/data_validation/IQROutliersBarPlot.py +5 -3
- validmind/tests/data_validation/IQROutliersTable.py +5 -2
- validmind/tests/data_validation/IsolationForestOutliers.py +5 -4
- validmind/tests/data_validation/JarqueBera.py +2 -2
- validmind/tests/data_validation/LJungBox.py +2 -2
- validmind/tests/data_validation/LaggedCorrelationHeatmap.py +1 -1
- validmind/tests/data_validation/MissingValues.py +14 -10
- validmind/tests/data_validation/MissingValuesBarPlot.py +3 -1
- validmind/tests/data_validation/MutualInformation.py +2 -1
- validmind/tests/data_validation/PearsonCorrelationMatrix.py +1 -1
- validmind/tests/data_validation/ProtectedClassesCombination.py +2 -0
- validmind/tests/data_validation/ProtectedClassesDescription.py +2 -2
- validmind/tests/data_validation/ProtectedClassesDisparity.py +9 -5
- validmind/tests/data_validation/ProtectedClassesThresholdOptimizer.py +10 -2
- validmind/tests/data_validation/RollingStatsPlot.py +2 -1
- validmind/tests/data_validation/ScoreBandDefaultRates.py +4 -2
- validmind/tests/data_validation/SeasonalDecompose.py +1 -1
- validmind/tests/data_validation/ShapiroWilk.py +2 -2
- validmind/tests/data_validation/SpreadPlot.py +1 -1
- validmind/tests/data_validation/TabularCategoricalBarPlots.py +1 -1
- validmind/tests/data_validation/TabularDateTimeHistograms.py +1 -1
- validmind/tests/data_validation/TargetRateBarPlots.py +4 -1
- validmind/tests/data_validation/TimeSeriesFrequency.py +1 -1
- validmind/tests/data_validation/TimeSeriesOutliers.py +7 -2
- validmind/tests/data_validation/WOEBinPlots.py +1 -1
- validmind/tests/data_validation/WOEBinTable.py +1 -1
- validmind/tests/data_validation/ZivotAndrewsArch.py +5 -2
- validmind/tests/data_validation/nlp/CommonWords.py +1 -1
- validmind/tests/data_validation/nlp/Hashtags.py +1 -1
- validmind/tests/data_validation/nlp/LanguageDetection.py +1 -1
- validmind/tests/data_validation/nlp/Mentions.py +1 -1
- validmind/tests/data_validation/nlp/PolarityAndSubjectivity.py +5 -1
- validmind/tests/data_validation/nlp/Punctuations.py +1 -1
- validmind/tests/data_validation/nlp/Sentiment.py +3 -1
- validmind/tests/data_validation/nlp/TextDescription.py +1 -1
- validmind/tests/data_validation/nlp/Toxicity.py +1 -1
- validmind/tests/model_validation/BertScore.py +7 -1
- validmind/tests/model_validation/BleuScore.py +7 -1
- validmind/tests/model_validation/ClusterSizeDistribution.py +3 -1
- validmind/tests/model_validation/ContextualRecall.py +9 -1
- validmind/tests/model_validation/FeaturesAUC.py +1 -1
- validmind/tests/model_validation/MeteorScore.py +7 -1
- validmind/tests/model_validation/ModelPredictionResiduals.py +5 -1
- validmind/tests/model_validation/RegardScore.py +6 -1
- validmind/tests/model_validation/RegressionResidualsPlot.py +10 -1
- validmind/tests/model_validation/RougeScore.py +3 -1
- validmind/tests/model_validation/TimeSeriesPredictionWithCI.py +2 -0
- validmind/tests/model_validation/TimeSeriesPredictionsPlot.py +10 -2
- validmind/tests/model_validation/TimeSeriesR2SquareBySegments.py +6 -2
- validmind/tests/model_validation/TokenDisparity.py +5 -1
- validmind/tests/model_validation/ToxicityScore.py +2 -0
- validmind/tests/model_validation/embeddings/ClusterDistribution.py +1 -1
- validmind/tests/model_validation/embeddings/CosineSimilarityComparison.py +5 -1
- validmind/tests/model_validation/embeddings/CosineSimilarityDistribution.py +5 -1
- validmind/tests/model_validation/embeddings/CosineSimilarityHeatmap.py +5 -1
- validmind/tests/model_validation/embeddings/DescriptiveAnalytics.py +2 -0
- validmind/tests/model_validation/embeddings/EmbeddingsVisualization2D.py +5 -1
- validmind/tests/model_validation/embeddings/EuclideanDistanceComparison.py +6 -2
- validmind/tests/model_validation/embeddings/EuclideanDistanceHeatmap.py +3 -1
- validmind/tests/model_validation/embeddings/PCAComponentsPairwisePlots.py +4 -1
- validmind/tests/model_validation/embeddings/StabilityAnalysisKeyword.py +5 -1
- validmind/tests/model_validation/embeddings/StabilityAnalysisRandomNoise.py +5 -1
- validmind/tests/model_validation/embeddings/StabilityAnalysisSynonyms.py +5 -1
- validmind/tests/model_validation/embeddings/StabilityAnalysisTranslation.py +5 -1
- validmind/tests/model_validation/embeddings/TSNEComponentsPairwisePlots.py +6 -1
- validmind/tests/model_validation/ragas/AnswerCorrectness.py +1 -1
- validmind/tests/model_validation/ragas/AspectCritic.py +4 -1
- validmind/tests/model_validation/ragas/ContextEntityRecall.py +1 -1
- validmind/tests/model_validation/ragas/ContextPrecision.py +1 -1
- validmind/tests/model_validation/ragas/ContextPrecisionWithoutReference.py +1 -1
- validmind/tests/model_validation/ragas/ContextRecall.py +1 -1
- validmind/tests/model_validation/ragas/Faithfulness.py +1 -1
- validmind/tests/model_validation/ragas/NoiseSensitivity.py +1 -1
- validmind/tests/model_validation/ragas/ResponseRelevancy.py +1 -1
- validmind/tests/model_validation/ragas/SemanticSimilarity.py +1 -1
- validmind/tests/model_validation/sklearn/AdjustedMutualInformation.py +9 -9
- validmind/tests/model_validation/sklearn/AdjustedRandIndex.py +9 -9
- validmind/tests/model_validation/sklearn/CalibrationCurve.py +5 -2
- validmind/tests/model_validation/sklearn/ClassifierThresholdOptimization.py +15 -2
- validmind/tests/model_validation/sklearn/ClusterCosineSimilarity.py +5 -1
- validmind/tests/model_validation/sklearn/ClusterPerformanceMetrics.py +24 -14
- validmind/tests/model_validation/sklearn/CompletenessScore.py +8 -9
- validmind/tests/model_validation/sklearn/ConfusionMatrix.py +22 -3
- validmind/tests/model_validation/sklearn/FeatureImportance.py +6 -2
- validmind/tests/model_validation/sklearn/FowlkesMallowsScore.py +12 -9
- validmind/tests/model_validation/sklearn/HomogeneityScore.py +14 -9
- validmind/tests/model_validation/sklearn/HyperParametersTuning.py +4 -2
- validmind/tests/model_validation/sklearn/KMeansClustersOptimization.py +6 -1
- validmind/tests/model_validation/sklearn/MinimumAccuracy.py +12 -7
- validmind/tests/model_validation/sklearn/MinimumF1Score.py +12 -7
- validmind/tests/model_validation/sklearn/MinimumROCAUCScore.py +21 -6
- validmind/tests/model_validation/sklearn/OverfitDiagnosis.py +8 -2
- validmind/tests/model_validation/sklearn/PermutationFeatureImportance.py +5 -1
- validmind/tests/model_validation/sklearn/PopulationStabilityIndex.py +5 -1
- validmind/tests/model_validation/sklearn/PrecisionRecallCurve.py +6 -1
- validmind/tests/model_validation/sklearn/ROCCurve.py +3 -1
- validmind/tests/model_validation/sklearn/RegressionErrors.py +6 -2
- validmind/tests/model_validation/sklearn/RegressionPerformance.py +13 -8
- validmind/tests/model_validation/sklearn/RegressionR2Square.py +8 -5
- validmind/tests/model_validation/sklearn/RobustnessDiagnosis.py +5 -1
- validmind/tests/model_validation/sklearn/SHAPGlobalImportance.py +6 -1
- validmind/tests/model_validation/sklearn/ScoreProbabilityAlignment.py +10 -2
- validmind/tests/model_validation/sklearn/SilhouettePlot.py +5 -1
- validmind/tests/model_validation/sklearn/VMeasure.py +12 -9
- validmind/tests/model_validation/statsmodels/CumulativePredictionProbabilities.py +5 -1
- validmind/tests/model_validation/statsmodels/DurbinWatsonTest.py +6 -1
- validmind/tests/model_validation/statsmodels/GINITable.py +8 -1
- validmind/tests/model_validation/statsmodels/KolmogorovSmirnov.py +2 -2
- validmind/tests/model_validation/statsmodels/PredictionProbabilitiesHistogram.py +6 -2
- validmind/tests/model_validation/statsmodels/RegressionCoeffs.py +8 -2
- validmind/tests/model_validation/statsmodels/RegressionFeatureSignificance.py +3 -1
- validmind/tests/model_validation/statsmodels/RegressionModelForecastPlot.py +7 -2
- validmind/tests/model_validation/statsmodels/RegressionModelForecastPlotLevels.py +2 -0
- validmind/tests/model_validation/statsmodels/RegressionModelSensitivityPlot.py +2 -0
- validmind/tests/model_validation/statsmodels/RegressionModelSummary.py +4 -2
- validmind/tests/model_validation/statsmodels/RegressionPermutationFeatureImportance.py +3 -1
- validmind/tests/ongoing_monitoring/CalibrationCurveDrift.py +11 -1
- validmind/tests/ongoing_monitoring/ClassificationAccuracyDrift.py +10 -2
- validmind/tests/ongoing_monitoring/ConfusionMatrixDrift.py +8 -1
- validmind/tests/ongoing_monitoring/CumulativePredictionProbabilitiesDrift.py +18 -2
- validmind/tests/ongoing_monitoring/FeatureDrift.py +9 -2
- validmind/tests/ongoing_monitoring/PredictionAcrossEachFeature.py +8 -2
- validmind/tests/ongoing_monitoring/PredictionCorrelation.py +13 -2
- validmind/tests/ongoing_monitoring/PredictionProbabilitiesHistogramDrift.py +13 -2
- validmind/tests/ongoing_monitoring/ROCCurveDrift.py +16 -2
- validmind/tests/ongoing_monitoring/ScoreBandsDrift.py +11 -2
- validmind/tests/ongoing_monitoring/TargetPredictionDistributionPlot.py +13 -2
- validmind/tests/prompt_validation/Clarity.py +1 -1
- validmind/tests/prompt_validation/NegativeInstruction.py +1 -1
- validmind/tests/prompt_validation/Robustness.py +6 -1
- validmind/tests/prompt_validation/Specificity.py +1 -1
- validmind/vm_models/result/utils.py +4 -23
- {validmind-2.8.10.dist-info → validmind-2.8.12.dist-info}/METADATA +2 -2
- {validmind-2.8.10.dist-info → validmind-2.8.12.dist-info}/RECORD +149 -149
- {validmind-2.8.10.dist-info → validmind-2.8.12.dist-info}/LICENSE +0 -0
- {validmind-2.8.10.dist-info → validmind-2.8.12.dist-info}/WHEEL +0 -0
- {validmind-2.8.10.dist-info → validmind-2.8.12.dist-info}/entry_points.txt +0 -0
@@ -117,4 +117,10 @@ def MeteorScore(dataset, model):
|
|
117
117
|
# Create a DataFrame from all collected statistics
|
118
118
|
result_df = pd.DataFrame(stats_df).reset_index().rename(columns={"index": "Metric"})
|
119
119
|
|
120
|
-
return (
|
120
|
+
return (
|
121
|
+
result_df,
|
122
|
+
*figures,
|
123
|
+
RawData(
|
124
|
+
meteor_scores=metrics_df, model=model.input_id, dataset=dataset.input_id
|
125
|
+
),
|
126
|
+
)
|
@@ -102,4 +102,8 @@ def ModelPredictionResiduals(
|
|
102
102
|
# Create a summary DataFrame for the KS normality test results
|
103
103
|
summary_df = pd.DataFrame([summary])
|
104
104
|
|
105
|
-
return (
|
105
|
+
return (
|
106
|
+
summary_df,
|
107
|
+
*figures,
|
108
|
+
RawData(residuals=residuals, model=model.input_id, dataset=dataset.input_id),
|
109
|
+
)
|
@@ -145,5 +145,10 @@ def RegardScore(dataset, model):
|
|
145
145
|
return (
|
146
146
|
result_df,
|
147
147
|
*figures,
|
148
|
-
RawData(
|
148
|
+
RawData(
|
149
|
+
true_regard=true_df,
|
150
|
+
pred_regard=pred_df,
|
151
|
+
model=model.input_id,
|
152
|
+
dataset=dataset.input_id,
|
153
|
+
),
|
149
154
|
)
|
@@ -105,4 +105,13 @@ def RegressionResidualsPlot(model: VMModel, dataset: VMDataset, bin_size: float
|
|
105
105
|
)
|
106
106
|
)
|
107
107
|
|
108
|
-
return (
|
108
|
+
return (
|
109
|
+
*figures,
|
110
|
+
RawData(
|
111
|
+
residuals=residuals,
|
112
|
+
y_true=y_true,
|
113
|
+
y_pred=y_pred,
|
114
|
+
model=model.input_id,
|
115
|
+
dataset=dataset.input_id,
|
116
|
+
),
|
117
|
+
)
|
@@ -121,5 +121,7 @@ def RougeScore(dataset, model, metric="rouge-1"):
|
|
121
121
|
return (
|
122
122
|
pd.DataFrame(stats_df).reset_index().rename(columns={"index": "Metric"}),
|
123
123
|
*figures,
|
124
|
-
RawData(
|
124
|
+
RawData(
|
125
|
+
rouge_scores_df=df_scores, model=model.input_id, dataset=dataset.input_id
|
126
|
+
),
|
125
127
|
)
|
@@ -4,7 +4,7 @@
|
|
4
4
|
|
5
5
|
import plotly.graph_objects as go
|
6
6
|
|
7
|
-
from validmind import tags, tasks
|
7
|
+
from validmind import RawData, tags, tasks
|
8
8
|
|
9
9
|
|
10
10
|
@tags("model_predictions", "visualization")
|
@@ -70,4 +70,12 @@ def TimeSeriesPredictionsPlot(dataset, model):
|
|
70
70
|
template="plotly_white",
|
71
71
|
)
|
72
72
|
|
73
|
-
|
73
|
+
raw_data = RawData(
|
74
|
+
time_index=time_index,
|
75
|
+
actual_values=dataset.y,
|
76
|
+
predicted_values=y_pred,
|
77
|
+
model=model.input_id,
|
78
|
+
dataset=dataset.input_id,
|
79
|
+
)
|
80
|
+
|
81
|
+
return fig, raw_data
|
@@ -7,7 +7,7 @@ import pandas as pd
|
|
7
7
|
import plotly.express as px
|
8
8
|
from sklearn import metrics
|
9
9
|
|
10
|
-
from validmind import tags, tasks
|
10
|
+
from validmind import RawData, tags, tasks
|
11
11
|
|
12
12
|
|
13
13
|
@tags("model_performance", "sklearn")
|
@@ -105,4 +105,8 @@ def TimeSeriesR2SquareBySegments(dataset, model, segments=None):
|
|
105
105
|
},
|
106
106
|
)
|
107
107
|
|
108
|
-
return
|
108
|
+
return (
|
109
|
+
fig,
|
110
|
+
results_df,
|
111
|
+
RawData(summary=results_df, model=model.input_id, dataset=dataset.input_id),
|
112
|
+
)
|
@@ -108,4 +108,8 @@ def TokenDisparity(dataset, model):
|
|
108
108
|
# Create a DataFrame from all collected statistics
|
109
109
|
result_df = pd.DataFrame(stats_df).reset_index().rename(columns={"index": "Metric"})
|
110
110
|
|
111
|
-
return (
|
111
|
+
return (
|
112
|
+
result_df,
|
113
|
+
*figures,
|
114
|
+
RawData(token_counts_df=df, model=model.input_id, dataset=dataset.input_id),
|
115
|
+
)
|
@@ -113,5 +113,9 @@ def CosineSimilarityComparison(dataset, models):
|
|
113
113
|
return (
|
114
114
|
*figures,
|
115
115
|
stats_df,
|
116
|
-
RawData(
|
116
|
+
RawData(
|
117
|
+
similarity_matrices=pd.DataFrame(similarity_matrices),
|
118
|
+
dataset=dataset.input_id,
|
119
|
+
models=[model.input_id for model in models],
|
120
|
+
),
|
117
121
|
)
|
@@ -59,4 +59,8 @@ def CosineSimilarityDistribution(dataset: VMDataset, model: VMModel):
|
|
59
59
|
nbins=100,
|
60
60
|
title="Cosine Similarity Distribution",
|
61
61
|
labels={"x": "Cosine Similarity"},
|
62
|
-
), RawData(
|
62
|
+
), RawData(
|
63
|
+
similarity_scores=similarity_scores,
|
64
|
+
model=model.input_id,
|
65
|
+
dataset=dataset.input_id,
|
66
|
+
)
|
@@ -89,4 +89,8 @@ def EmbeddingsVisualization2D(
|
|
89
89
|
fig = px.scatter(**scatter_kwargs)
|
90
90
|
fig.update_layout(width=500, height=500)
|
91
91
|
|
92
|
-
return fig, RawData(
|
92
|
+
return fig, RawData(
|
93
|
+
tsne_embeddings=reduced_embeddings,
|
94
|
+
model=model.input_id,
|
95
|
+
dataset=dataset.input_id,
|
96
|
+
)
|
@@ -57,7 +57,7 @@ def EuclideanDistanceComparison(dataset, models):
|
|
57
57
|
figures = []
|
58
58
|
all_stats = []
|
59
59
|
|
60
|
-
distance_matrices =
|
60
|
+
distance_matrices = []
|
61
61
|
|
62
62
|
# Generate all pairs of models for comparison
|
63
63
|
for model_A, model_B in combinations(models, 2):
|
@@ -105,6 +105,10 @@ def EuclideanDistanceComparison(dataset, models):
|
|
105
105
|
stats_df = pd.DataFrame(all_stats)
|
106
106
|
|
107
107
|
# Add raw data to return
|
108
|
-
raw_data = RawData(
|
108
|
+
raw_data = RawData(
|
109
|
+
distance_matrices=pd.DataFrame(distance_matrices),
|
110
|
+
dataset=dataset.input_id,
|
111
|
+
models=[model.input_id for model in models],
|
112
|
+
)
|
109
113
|
|
110
114
|
return (stats_df, *figures, raw_data)
|
@@ -97,4 +97,8 @@ def StabilityAnalysisKeyword(
|
|
97
97
|
mean_similarity_threshold,
|
98
98
|
)
|
99
99
|
|
100
|
-
return results, RawData(
|
100
|
+
return results, RawData(
|
101
|
+
original_perturbed_similarity=raw_data,
|
102
|
+
model=model.input_id,
|
103
|
+
dataset=dataset.input_id,
|
104
|
+
)
|
@@ -151,4 +151,8 @@ def StabilityAnalysisRandomNoise(
|
|
151
151
|
mean_similarity_threshold,
|
152
152
|
)
|
153
153
|
|
154
|
-
return *result, RawData(
|
154
|
+
return *result, RawData(
|
155
|
+
original_perturbed_similarity=raw_data,
|
156
|
+
model=model.input_id,
|
157
|
+
dataset=dataset.input_id,
|
158
|
+
)
|
@@ -107,4 +107,8 @@ def StabilityAnalysisSynonyms(
|
|
107
107
|
mean_similarity_threshold,
|
108
108
|
)
|
109
109
|
|
110
|
-
return *result, RawData(
|
110
|
+
return *result, RawData(
|
111
|
+
original_perturbed_similarity=raw_data,
|
112
|
+
model=model.input_id,
|
113
|
+
dataset=dataset.input_id,
|
114
|
+
)
|
@@ -134,4 +134,8 @@ def StabilityAnalysisTranslation(
|
|
134
134
|
mean_similarity_threshold,
|
135
135
|
)
|
136
136
|
|
137
|
-
return *result, RawData(
|
137
|
+
return *result, RawData(
|
138
|
+
original_perturbed_similarity=raw_data,
|
139
|
+
model=model.input_id,
|
140
|
+
dataset=dataset.input_id,
|
141
|
+
)
|
@@ -110,5 +110,10 @@ def TSNEComponentsPairwisePlots(
|
|
110
110
|
|
111
111
|
return (
|
112
112
|
*figures,
|
113
|
-
RawData(
|
113
|
+
RawData(
|
114
|
+
embeddings_scaled=embeddings_scaled,
|
115
|
+
tsne_results=tsne_results,
|
116
|
+
model=model.input_id,
|
117
|
+
dataset=dataset.input_id,
|
118
|
+
),
|
114
119
|
)
|
@@ -4,7 +4,7 @@
|
|
4
4
|
|
5
5
|
from sklearn.metrics import adjusted_mutual_info_score
|
6
6
|
|
7
|
-
from validmind import tags, tasks
|
7
|
+
from validmind import RawData, tags, tasks
|
8
8
|
from validmind.vm_models import VMDataset, VMModel
|
9
9
|
|
10
10
|
|
@@ -52,11 +52,11 @@ def AdjustedMutualInformation(model: VMModel, dataset: VMDataset):
|
|
52
52
|
- The interpretability of the score can be complex as it depends on the understanding of information theory
|
53
53
|
concepts.
|
54
54
|
"""
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
55
|
+
ami_score = adjusted_mutual_info_score(
|
56
|
+
labels_true=dataset.y,
|
57
|
+
labels_pred=dataset.y_pred(model),
|
58
|
+
)
|
59
|
+
|
60
|
+
return [{"Adjusted Mutual Information": ami_score}], RawData(
|
61
|
+
ami_score=ami_score, model=model.input_id, dataset=dataset.input_id
|
62
|
+
)
|
@@ -4,7 +4,7 @@
|
|
4
4
|
|
5
5
|
from sklearn.metrics import adjusted_rand_score
|
6
6
|
|
7
|
-
from validmind import tags, tasks
|
7
|
+
from validmind import RawData, tags, tasks
|
8
8
|
from validmind.vm_models import VMDataset, VMModel
|
9
9
|
|
10
10
|
|
@@ -49,11 +49,11 @@ def AdjustedRandIndex(model: VMModel, dataset: VMDataset):
|
|
49
49
|
- It may be difficult to interpret the implications of an ARI score without context or a benchmark, as it is
|
50
50
|
heavily dependent on the characteristics of the dataset used.
|
51
51
|
"""
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
52
|
+
ari = adjusted_rand_score(
|
53
|
+
labels_true=dataset.y,
|
54
|
+
labels_pred=dataset.y_pred(model),
|
55
|
+
)
|
56
|
+
|
57
|
+
return [{"Adjusted Rand Index": ari}], RawData(
|
58
|
+
ari_score=ari, model=model.input_id, dataset=dataset.input_id
|
59
|
+
)
|
@@ -72,7 +72,10 @@ def CalibrationCurve(model: VMModel, dataset: VMDataset, n_bins: int = 10):
|
|
72
72
|
|
73
73
|
# Create DataFrame for raw data
|
74
74
|
raw_data = RawData(
|
75
|
-
mean_predicted_probability=prob_pred,
|
75
|
+
mean_predicted_probability=prob_pred,
|
76
|
+
observed_frequency=prob_true,
|
77
|
+
model=model.input_id,
|
78
|
+
dataset=dataset.input_id,
|
76
79
|
)
|
77
80
|
|
78
81
|
# Create Plotly figure
|
@@ -114,4 +117,4 @@ def CalibrationCurve(model: VMModel, dataset: VMDataset, n_bins: int = 10):
|
|
114
117
|
template="plotly_white",
|
115
118
|
)
|
116
119
|
|
117
|
-
return
|
120
|
+
return fig, raw_data
|
@@ -8,7 +8,7 @@ import plotly.graph_objects as go
|
|
8
8
|
from plotly.subplots import make_subplots
|
9
9
|
from sklearn.metrics import confusion_matrix, precision_recall_curve, roc_curve
|
10
10
|
|
11
|
-
from validmind import tags, tasks
|
11
|
+
from validmind import RawData, tags, tasks
|
12
12
|
from validmind.vm_models import VMDataset, VMModel
|
13
13
|
|
14
14
|
|
@@ -255,4 +255,17 @@ def ClassifierThresholdOptimization(
|
|
255
255
|
# Create results table and sort by threshold descending
|
256
256
|
table = pd.DataFrame(results).sort_values("threshold", ascending=False)
|
257
257
|
|
258
|
-
return
|
258
|
+
return (
|
259
|
+
fig,
|
260
|
+
table,
|
261
|
+
RawData(
|
262
|
+
fpr=fpr,
|
263
|
+
tpr=tpr,
|
264
|
+
precision=precision,
|
265
|
+
recall=recall,
|
266
|
+
thresholds_roc=thresholds_roc,
|
267
|
+
thresholds_pr=thresholds_pr,
|
268
|
+
model=model.input_id,
|
269
|
+
dataset=dataset.input_id,
|
270
|
+
),
|
271
|
+
)
|
@@ -84,4 +84,8 @@ def ClusterCosineSimilarity(model: VMModel, dataset: VMDataset):
|
|
84
84
|
if not table:
|
85
85
|
raise SkipTestError("No clusters found")
|
86
86
|
|
87
|
-
return table, RawData(
|
87
|
+
return table, RawData(
|
88
|
+
cluster_centroids=cluster_centroids,
|
89
|
+
model=model.input_id,
|
90
|
+
dataset=dataset.input_id,
|
91
|
+
)
|
@@ -11,7 +11,7 @@ from sklearn.metrics import (
|
|
11
11
|
v_measure_score,
|
12
12
|
)
|
13
13
|
|
14
|
-
from validmind import tags, tasks
|
14
|
+
from validmind import RawData, tags, tasks
|
15
15
|
from validmind.vm_models import VMDataset, VMModel
|
16
16
|
|
17
17
|
HOMOGENEITY = """
|
@@ -115,53 +115,63 @@ def ClusterPerformanceMetrics(model: VMModel, dataset: VMDataset):
|
|
115
115
|
- Does not consider aspects like computational efficiency of the model or its capability to handle high dimensional
|
116
116
|
data.
|
117
117
|
"""
|
118
|
-
|
118
|
+
y_true = dataset.y
|
119
|
+
y_pred = dataset.y_pred(model)
|
120
|
+
|
121
|
+
metrics = [
|
119
122
|
{
|
120
123
|
"Metric": "Homogeneity Score",
|
121
124
|
"Description": HOMOGENEITY,
|
122
125
|
"Value": homogeneity_score(
|
123
|
-
labels_true=
|
124
|
-
labels_pred=
|
126
|
+
labels_true=y_true,
|
127
|
+
labels_pred=y_pred,
|
125
128
|
),
|
126
129
|
},
|
127
130
|
{
|
128
131
|
"Metric": "Completeness Score",
|
129
132
|
"Description": COMPLETENESS,
|
130
133
|
"Value": completeness_score(
|
131
|
-
labels_true=
|
132
|
-
labels_pred=
|
134
|
+
labels_true=y_true,
|
135
|
+
labels_pred=y_pred,
|
133
136
|
),
|
134
137
|
},
|
135
138
|
{
|
136
139
|
"Metric": "V Measure",
|
137
140
|
"Description": V_MEASURE,
|
138
141
|
"Value": v_measure_score(
|
139
|
-
labels_true=
|
140
|
-
labels_pred=
|
142
|
+
labels_true=y_true,
|
143
|
+
labels_pred=y_pred,
|
141
144
|
),
|
142
145
|
},
|
143
146
|
{
|
144
147
|
"Metric": "Adjusted Rand Index",
|
145
148
|
"Description": ADJUSTED_RAND_INDEX,
|
146
149
|
"Value": adjusted_rand_score(
|
147
|
-
labels_true=
|
148
|
-
labels_pred=
|
150
|
+
labels_true=y_true,
|
151
|
+
labels_pred=y_pred,
|
149
152
|
),
|
150
153
|
},
|
151
154
|
{
|
152
155
|
"Metric": "Adjusted Mutual Information",
|
153
156
|
"Description": ADJUSTED_MUTUAL_INFORMATION,
|
154
157
|
"Value": adjusted_mutual_info_score(
|
155
|
-
labels_true=
|
156
|
-
labels_pred=
|
158
|
+
labels_true=y_true,
|
159
|
+
labels_pred=y_pred,
|
157
160
|
),
|
158
161
|
},
|
159
162
|
{
|
160
163
|
"Metric": "Fowlkes-Mallows score",
|
161
164
|
"Description": FOULKES_MALLOWS_SCORE,
|
162
165
|
"Value": fowlkes_mallows_score(
|
163
|
-
labels_true=
|
164
|
-
labels_pred=
|
166
|
+
labels_true=y_true,
|
167
|
+
labels_pred=y_pred,
|
165
168
|
),
|
166
169
|
},
|
167
170
|
]
|
171
|
+
|
172
|
+
return metrics, RawData(
|
173
|
+
true_labels=y_true,
|
174
|
+
predicted_labels=y_pred,
|
175
|
+
model=model.input_id,
|
176
|
+
dataset=dataset.input_id,
|
177
|
+
)
|
@@ -4,7 +4,7 @@
|
|
4
4
|
|
5
5
|
from sklearn.metrics import completeness_score
|
6
6
|
|
7
|
-
from validmind import tags, tasks
|
7
|
+
from validmind import RawData, tags, tasks
|
8
8
|
from validmind.vm_models import VMDataset, VMModel
|
9
9
|
|
10
10
|
|
@@ -47,11 +47,10 @@ def CompletenessScore(model: VMModel, dataset: VMDataset):
|
|
47
47
|
- The Completeness Score only applies to clustering models; it cannot be used for other types of machine learning
|
48
48
|
models.
|
49
49
|
"""
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
]
|
50
|
+
score = completeness_score(
|
51
|
+
labels_true=dataset.y,
|
52
|
+
labels_pred=dataset.y_pred(model),
|
53
|
+
)
|
54
|
+
return [{"Completeness Score": score}], RawData(
|
55
|
+
score=score, model=model.input_id, dataset=dataset.input_id
|
56
|
+
)
|