PyPI - validmind - Versions diffs - 2.5.24__py3-none-any.whl → 2.6.7__py3-none-any.whl - Mend

validmind 2.5.24py3-none-any.whl → 2.6.7py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (198) hide show

validmind/__init__.py +8 -17
validmind/__version__.py +1 -1
validmind/ai/test_descriptions.py +66 -85
validmind/ai/test_result_description/context.py +2 -2
validmind/ai/utils.py +26 -1
validmind/api_client.py +43 -79
validmind/client.py +5 -7
validmind/client_config.py +1 -1
validmind/datasets/__init__.py +1 -1
validmind/datasets/classification/customer_churn.py +7 -5
validmind/datasets/nlp/__init__.py +2 -2
validmind/errors.py +6 -10
validmind/html_templates/content_blocks.py +18 -16
validmind/logging.py +21 -16
validmind/tests/__init__.py +28 -5
validmind/tests/__types__.py +186 -170
validmind/tests/_store.py +7 -21
validmind/tests/comparison.py +362 -0
validmind/tests/data_validation/ACFandPACFPlot.py +44 -73
validmind/tests/data_validation/ADF.py +49 -83
validmind/tests/data_validation/AutoAR.py +59 -96
validmind/tests/data_validation/AutoMA.py +59 -96
validmind/tests/data_validation/AutoStationarity.py +66 -114
validmind/tests/data_validation/ClassImbalance.py +48 -117
validmind/tests/data_validation/DatasetDescription.py +180 -209
validmind/tests/data_validation/DatasetSplit.py +50 -75
validmind/tests/data_validation/DescriptiveStatistics.py +59 -85
validmind/tests/data_validation/{DFGLSArch.py → DickeyFullerGLS.py} +44 -76
validmind/tests/data_validation/Duplicates.py +21 -90
validmind/tests/data_validation/EngleGrangerCoint.py +53 -75
validmind/tests/data_validation/HighCardinality.py +32 -80
validmind/tests/data_validation/HighPearsonCorrelation.py +29 -97
validmind/tests/data_validation/IQROutliersBarPlot.py +63 -94
validmind/tests/data_validation/IQROutliersTable.py +40 -80
validmind/tests/data_validation/IsolationForestOutliers.py +41 -63
validmind/tests/data_validation/KPSS.py +33 -81
validmind/tests/data_validation/LaggedCorrelationHeatmap.py +47 -95
validmind/tests/data_validation/MissingValues.py +17 -58
validmind/tests/data_validation/MissingValuesBarPlot.py +61 -87
validmind/tests/data_validation/PhillipsPerronArch.py +56 -79
validmind/tests/data_validation/RollingStatsPlot.py +50 -81
validmind/tests/data_validation/SeasonalDecompose.py +102 -184
validmind/tests/data_validation/Skewness.py +27 -64
validmind/tests/data_validation/SpreadPlot.py +34 -57
validmind/tests/data_validation/TabularCategoricalBarPlots.py +46 -65
validmind/tests/data_validation/TabularDateTimeHistograms.py +23 -45
validmind/tests/data_validation/TabularNumericalHistograms.py +27 -46
validmind/tests/data_validation/TargetRateBarPlots.py +54 -93
validmind/tests/data_validation/TimeSeriesFrequency.py +48 -133
validmind/tests/data_validation/TimeSeriesHistogram.py +24 -3
validmind/tests/data_validation/TimeSeriesLinePlot.py +29 -47
validmind/tests/data_validation/TimeSeriesMissingValues.py +59 -135
validmind/tests/data_validation/TimeSeriesOutliers.py +54 -171
validmind/tests/data_validation/TooManyZeroValues.py +21 -70
validmind/tests/data_validation/UniqueRows.py +23 -62
validmind/tests/data_validation/WOEBinPlots.py +83 -109
validmind/tests/data_validation/WOEBinTable.py +28 -69
validmind/tests/data_validation/ZivotAndrewsArch.py +33 -75
validmind/tests/data_validation/nlp/CommonWords.py +49 -57
validmind/tests/data_validation/nlp/Hashtags.py +27 -49
validmind/tests/data_validation/nlp/LanguageDetection.py +7 -13
validmind/tests/data_validation/nlp/Mentions.py +32 -63
validmind/tests/data_validation/nlp/PolarityAndSubjectivity.py +89 -14
validmind/tests/data_validation/nlp/Punctuations.py +63 -47
validmind/tests/data_validation/nlp/Sentiment.py +4 -0
validmind/tests/data_validation/nlp/StopWords.py +62 -91
validmind/tests/data_validation/nlp/TextDescription.py +116 -159
validmind/tests/data_validation/nlp/Toxicity.py +12 -4
validmind/tests/decorator.py +33 -242
validmind/tests/load.py +212 -153
validmind/tests/model_validation/BertScore.py +13 -7
validmind/tests/model_validation/BleuScore.py +4 -0
validmind/tests/model_validation/ClusterSizeDistribution.py +24 -47
validmind/tests/model_validation/ContextualRecall.py +3 -0
validmind/tests/model_validation/FeaturesAUC.py +43 -74
validmind/tests/model_validation/MeteorScore.py +3 -0
validmind/tests/model_validation/RegardScore.py +5 -1
validmind/tests/model_validation/RegressionResidualsPlot.py +54 -75
validmind/tests/model_validation/embeddings/ClusterDistribution.py +10 -33
validmind/tests/model_validation/embeddings/CosineSimilarityDistribution.py +11 -29
validmind/tests/model_validation/embeddings/DescriptiveAnalytics.py +19 -31
validmind/tests/model_validation/embeddings/EmbeddingsVisualization2D.py +40 -49
validmind/tests/model_validation/embeddings/StabilityAnalysisKeyword.py +29 -15
validmind/tests/model_validation/embeddings/StabilityAnalysisRandomNoise.py +25 -11
validmind/tests/model_validation/embeddings/StabilityAnalysisSynonyms.py +28 -13
validmind/tests/model_validation/embeddings/StabilityAnalysisTranslation.py +67 -38
validmind/tests/model_validation/embeddings/utils.py +53 -0
validmind/tests/model_validation/ragas/AnswerCorrectness.py +37 -32
validmind/tests/model_validation/ragas/{AspectCritique.py → AspectCritic.py} +33 -27
validmind/tests/model_validation/ragas/ContextEntityRecall.py +44 -41
validmind/tests/model_validation/ragas/ContextPrecision.py +40 -35
validmind/tests/model_validation/ragas/ContextPrecisionWithoutReference.py +133 -0
validmind/tests/model_validation/ragas/ContextRecall.py +40 -35
validmind/tests/model_validation/ragas/Faithfulness.py +42 -30
validmind/tests/model_validation/ragas/NoiseSensitivity.py +59 -35
validmind/tests/model_validation/ragas/{AnswerRelevance.py → ResponseRelevancy.py} +52 -41
validmind/tests/model_validation/ragas/{AnswerSimilarity.py → SemanticSimilarity.py} +39 -34
validmind/tests/model_validation/sklearn/AdjustedMutualInformation.py +13 -16
validmind/tests/model_validation/sklearn/AdjustedRandIndex.py +13 -16
validmind/tests/model_validation/sklearn/ClassifierPerformance.py +51 -89
validmind/tests/model_validation/sklearn/ClusterCosineSimilarity.py +31 -61
validmind/tests/model_validation/sklearn/ClusterPerformanceMetrics.py +118 -83
validmind/tests/model_validation/sklearn/CompletenessScore.py +13 -16
validmind/tests/model_validation/sklearn/ConfusionMatrix.py +62 -94
validmind/tests/model_validation/sklearn/FeatureImportance.py +7 -8
validmind/tests/model_validation/sklearn/FowlkesMallowsScore.py +12 -15
validmind/tests/model_validation/sklearn/HomogeneityScore.py +12 -15
validmind/tests/model_validation/sklearn/HyperParametersTuning.py +23 -53
validmind/tests/model_validation/sklearn/KMeansClustersOptimization.py +60 -74
validmind/tests/model_validation/sklearn/MinimumAccuracy.py +16 -84
validmind/tests/model_validation/sklearn/MinimumF1Score.py +22 -72
validmind/tests/model_validation/sklearn/MinimumROCAUCScore.py +29 -78
validmind/tests/model_validation/sklearn/ModelsPerformanceComparison.py +52 -82
validmind/tests/model_validation/sklearn/OverfitDiagnosis.py +51 -145
validmind/tests/model_validation/sklearn/PermutationFeatureImportance.py +60 -78
validmind/tests/model_validation/sklearn/PopulationStabilityIndex.py +130 -172
validmind/tests/model_validation/sklearn/PrecisionRecallCurve.py +26 -55
validmind/tests/model_validation/sklearn/ROCCurve.py +43 -77
validmind/tests/model_validation/sklearn/RegressionPerformance.py +41 -94
validmind/tests/model_validation/sklearn/RobustnessDiagnosis.py +47 -136
validmind/tests/model_validation/sklearn/SHAPGlobalImportance.py +164 -208
validmind/tests/model_validation/sklearn/SilhouettePlot.py +54 -99
validmind/tests/model_validation/sklearn/TrainingTestDegradation.py +50 -124
validmind/tests/model_validation/sklearn/VMeasure.py +12 -15
validmind/tests/model_validation/sklearn/WeakspotsDiagnosis.py +225 -281
validmind/tests/model_validation/statsmodels/AutoARIMA.py +40 -45
validmind/tests/model_validation/statsmodels/KolmogorovSmirnov.py +22 -47
validmind/tests/model_validation/statsmodels/Lilliefors.py +17 -28
validmind/tests/model_validation/statsmodels/RegressionFeatureSignificance.py +37 -81
validmind/tests/model_validation/statsmodels/RegressionModelForecastPlot.py +37 -105
validmind/tests/model_validation/statsmodels/RegressionModelForecastPlotLevels.py +62 -166
validmind/tests/model_validation/statsmodels/RegressionModelSensitivityPlot.py +57 -119
validmind/tests/model_validation/statsmodels/RegressionModelSummary.py +20 -57
validmind/tests/model_validation/statsmodels/RegressionPermutationFeatureImportance.py +47 -80
validmind/tests/ongoing_monitoring/PredictionCorrelation.py +2 -0
validmind/tests/ongoing_monitoring/TargetPredictionDistributionPlot.py +4 -2
validmind/tests/output.py +120 -0
validmind/tests/prompt_validation/Bias.py +55 -98
validmind/tests/prompt_validation/Clarity.py +56 -99
validmind/tests/prompt_validation/Conciseness.py +63 -101
validmind/tests/prompt_validation/Delimitation.py +48 -89
validmind/tests/prompt_validation/NegativeInstruction.py +62 -96
validmind/tests/prompt_validation/Robustness.py +80 -121
validmind/tests/prompt_validation/Specificity.py +61 -95
validmind/tests/prompt_validation/ai_powered_test.py +2 -2
validmind/tests/run.py +314 -496
validmind/tests/test_providers.py +109 -79
validmind/tests/utils.py +91 -0
validmind/unit_metrics/__init__.py +16 -155
validmind/unit_metrics/classification/F1.py +1 -0
validmind/unit_metrics/classification/Precision.py +1 -0
validmind/unit_metrics/classification/ROC_AUC.py +1 -0
validmind/unit_metrics/classification/Recall.py +1 -0
validmind/unit_metrics/regression/AdjustedRSquaredScore.py +1 -0
validmind/unit_metrics/regression/GiniCoefficient.py +1 -0
validmind/unit_metrics/regression/HuberLoss.py +1 -0
validmind/unit_metrics/regression/KolmogorovSmirnovStatistic.py +1 -0
validmind/unit_metrics/regression/MeanAbsoluteError.py +1 -0
validmind/unit_metrics/regression/MeanAbsolutePercentageError.py +1 -0
validmind/unit_metrics/regression/MeanBiasDeviation.py +1 -0
validmind/unit_metrics/regression/MeanSquaredError.py +1 -0
validmind/unit_metrics/regression/QuantileLoss.py +1 -0
validmind/unit_metrics/regression/RSquaredScore.py +2 -1
validmind/unit_metrics/regression/RootMeanSquaredError.py +1 -0
validmind/utils.py +66 -17
validmind/vm_models/__init__.py +2 -17
validmind/vm_models/dataset/dataset.py +31 -4
validmind/vm_models/figure.py +7 -37
validmind/vm_models/model.py +3 -0
validmind/vm_models/result/__init__.py +7 -0
validmind/vm_models/result/result.jinja +21 -0
validmind/vm_models/result/result.py +337 -0
validmind/vm_models/result/utils.py +160 -0
validmind/vm_models/test_suite/runner.py +16 -54
validmind/vm_models/test_suite/summary.py +3 -3
validmind/vm_models/test_suite/test.py +43 -77
validmind/vm_models/test_suite/test_suite.py +8 -40
validmind-2.6.7.dist-info/METADATA +137 -0
{validmind-2.5.24.dist-info → validmind-2.6.7.dist-info}/RECORD +182 -189
validmind/tests/data_validation/AutoSeasonality.py +0 -190
validmind/tests/metadata.py +0 -59
validmind/tests/model_validation/embeddings/StabilityAnalysis.py +0 -176
validmind/tests/model_validation/ragas/ContextUtilization.py +0 -161
validmind/tests/model_validation/sklearn/ClusterPerformance.py +0 -80
validmind/unit_metrics/composite.py +0 -238
validmind/vm_models/test/metric.py +0 -98
validmind/vm_models/test/metric_result.py +0 -61
validmind/vm_models/test/output_template.py +0 -55
validmind/vm_models/test/result_summary.py +0 -76
validmind/vm_models/test/result_wrapper.py +0 -488
validmind/vm_models/test/test.py +0 -103
validmind/vm_models/test/threshold_test.py +0 -106
validmind/vm_models/test/threshold_test_result.py +0 -75
validmind/vm_models/test_context.py +0 -259
validmind-2.5.24.dist-info/METADATA +0 -118
{validmind-2.5.24.dist-info → validmind-2.6.7.dist-info}/LICENSE +0 -0
{validmind-2.5.24.dist-info → validmind-2.6.7.dist-info}/WHEEL +0 -0
{validmind-2.5.24.dist-info → validmind-2.6.7.dist-info}/entry_points.txt +0 -0

validmind/tests/model_validation/sklearn/AdjustedRandIndex.py CHANGED Viewed

@@ -2,15 +2,15 @@
 # See the LICENSE file in the root of this repository for details.
 # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
-from dataclasses import dataclass
+from sklearn.metrics import adjusted_rand_score
-from sklearn import metrics
+from validmind import tags, tasks
+from validmind.vm_models import VMDataset, VMModel
-from .ClusterPerformance import ClusterPerformance
-@dataclass
-class AdjustedRandIndex(ClusterPerformance):
+@tags("sklearn", "model_performance", "clustering")
+@tasks("clustering")
+def AdjustedRandIndex(model: VMModel, dataset: VMDataset):
     """
     Measures the similarity between two data clusters using the Adjusted Rand Index (ARI) metric in clustering machine
     learning models.
@@ -49,14 +49,11 @@ class AdjustedRandIndex(ClusterPerformance):
     - It may be difficult to interpret the implications of an ARI score without context or a benchmark, as it is
     heavily dependent on the characteristics of the dataset used.
     """
-    name = "adjusted_rand_index"
-    required_inputs = ["model", "dataset"]
-    tasks = ["clustering"]
-    tags = [
-        "sklearn",
-        "model_performance",
+    return [
+        {
+            "Adjusted Rand Index": adjusted_rand_score(
+                labels_true=dataset.y,
+                labels_pred=dataset.y_pred(model),
+            )
+        }
     ]
-    def metric_info(self):
-        return {"Adjusted Rand Index": metrics.adjusted_rand_score}

validmind/tests/model_validation/sklearn/ClassifierPerformance.py CHANGED Viewed

@@ -2,24 +2,25 @@
 # See the LICENSE file in the root of this repository for details.
 # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
-from dataclasses import dataclass
 import numpy as np
 from sklearn.metrics import classification_report, roc_auc_score
 from sklearn.preprocessing import LabelBinarizer
-from validmind.vm_models import Metric, ResultSummary, ResultTable, ResultTableMetadata
+from validmind import tags, tasks
+from validmind.vm_models import VMDataset, VMModel
 def multiclass_roc_auc_score(y_test, y_pred, average="macro"):
     lb = LabelBinarizer()
     lb.fit(y_test)
     return roc_auc_score(lb.transform(y_test), lb.transform(y_pred), average=average)
-@dataclass
-class ClassifierPerformance(Metric):
+@tags(
+    "sklearn", "binary_classification", "multiclass_classification", "model_performance"
+)
+@tasks("classification", "text_classification")
+def ClassifierPerformance(dataset: VMDataset, model: VMModel, average: str = "macro"):
     """
     Evaluates performance of binary or multiclass classification models using precision, recall, F1-Score, accuracy,
     and ROC AUC scores.
@@ -57,92 +58,53 @@ class ClassifierPerformance(Metric):
     - Specifically designed for classification models and not suitable for regression models.
     - May provide limited insights if the test dataset does not represent real-world scenarios adequately.
     """
-    name = "classifier_performance"
-    required_inputs = ["model", "dataset"]
-    tasks = ["classification", "text_classification"]
-    tags = [
-        "sklearn",
-        "binary_classification",
-        "multiclass_classification",
-        "model_performance",
+    y_pred = dataset.y_pred(model)
+    y_true = dataset.y
+    labels = np.unique(y_true)
+    labels = sorted(labels.tolist())
+    report = classification_report(
+        y_true=y_true,
+        y_pred=y_pred,
+        output_dict=True,
+        zero_division=0,
+    )
+    if len(labels) > 2:
+        y_true = y_true.astype(y_pred.dtype)
+        roc_auc = multiclass_roc_auc_score(y_true, y_pred, average=average)
+    else:
+        y_prob = dataset.y_prob(model)
+        y_true = y_true.astype(y_prob.dtype).flatten()
+        roc_auc = roc_auc_score(y_true, y_prob, average=average)
+    report["roc_auc"] = roc_auc
+    pr_f1_table = [
+        {
+            "Class": f"{class_name}",
+            "Precision": report[f"{class_name}"]["precision"],
+            "Recall": report[f"{class_name}"]["recall"],
+            "F1": report[f"{class_name}"]["f1-score"],
+        }
+        for class_name in labels
     ]
-    default_params = {"average": "macro"}
-    def summary(self, metric_value: dict):
-        """
-        When building a multi-class summary we need to calculate weighted average,
-        macro average and per class metrics.
-        """
-        classes = {str(i) for i in np.unique(self.inputs.dataset.y)}
-        pr_f1_table = [
-            {
-                "Class": class_name,
-                "Precision": metric_value[class_name]["precision"],
-                "Recall": metric_value[class_name]["recall"],
-                "F1": metric_value[class_name]["f1-score"],
-            }
-            for class_name in classes
-        ]
-        pr_f1_table.extend(
-            [
-                {
-                    "Class": "Weighted Average",
-                    "Precision": metric_value["weighted avg"]["precision"],
-                    "Recall": metric_value["weighted avg"]["recall"],
-                    "F1": metric_value["weighted avg"]["f1-score"],
-                },
-                {
-                    "Class": "Macro Average",
-                    "Precision": metric_value["macro avg"]["precision"],
-                    "Recall": metric_value["macro avg"]["recall"],
-                    "F1": metric_value["macro avg"]["f1-score"],
-                },
-            ]
-        )
-        acc_roc_auc_table = [
+    for avg in ["weighted avg", "macro avg"]:
+        pr_f1_table.append(
             {
-                "Metric": "Accuracy" if metric_name == "accuracy" else "ROC AUC",
-                "Value": metric_value[metric_name],
+                "Class": avg.replace("avg", "Average").title(),
+                "Precision": report[avg]["precision"],
+                "Recall": report[avg]["recall"],
+                "F1": report[avg]["f1-score"],
             }
-            for metric_name in ["accuracy", "roc_auc"]
-        ]
-        return ResultSummary(
-            results=[
-                ResultTable(
-                    data=pr_f1_table,
-                    metadata=ResultTableMetadata(title="Precision, Recall, and F1"),
-                ),
-                ResultTable(
-                    data=acc_roc_auc_table,
-                    metadata=ResultTableMetadata(title="Accuracy and ROC AUC"),
-                ),
-            ]
         )
-    def run(self):
-        report = classification_report(
-            self.inputs.dataset.y,
-            self.inputs.dataset.y_pred(self.inputs.model),
-            output_dict=True,
-            zero_division=0,
-        )
-        y_true = self.inputs.dataset.y
-        if len(np.unique(y_true)) > 2:
-            y_pred = self.inputs.dataset.y_pred(self.inputs.model)
-            y_true = y_true.astype(y_pred.dtype)
-            roc_auc = multiclass_roc_auc_score(
-                y_true, y_pred, average=self.params["average"]
-            )
-        else:
-            y_prob = self.inputs.dataset.y_prob(self.inputs.model)
-            y_true = y_true.astype(y_prob.dtype).flatten()
-            roc_auc = roc_auc_score(y_true, y_prob, average=self.params["average"])
-        report["roc_auc"] = roc_auc
-        return self.cache_results(report)
+    return {
+        "Precision, Recall, and F1": pr_f1_table,
+        "Accuracy and ROC AUC": [
+            {"Metric": m, "Value": report[k]}
+            for m, k in [("Accuracy", "accuracy"), ("ROC AUC", "roc_auc")]
+        ],
+    }

validmind/tests/model_validation/sklearn/ClusterCosineSimilarity.py CHANGED Viewed

@@ -2,17 +2,17 @@
 # See the LICENSE file in the root of this repository for details.
 # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
-from dataclasses import dataclass
 import numpy as np
-import pandas as pd
 from sklearn.metrics.pairwise import cosine_similarity
-from validmind.vm_models import Metric, ResultSummary, ResultTable, ResultTableMetadata
+from validmind import tags, tasks
+from validmind.errors import SkipTestError
+from validmind.vm_models import VMDataset, VMModel
-@dataclass
-class ClusterCosineSimilarity(Metric):
+@tags("sklearn", "model_performance", "clustering")
+@tasks("clustering")
+def ClusterCosineSimilarity(model: VMModel, dataset: VMDataset):
     """
     Measures the intra-cluster similarity of a clustering model using cosine similarity.
@@ -56,59 +56,29 @@ class ClusterCosineSimilarity(Metric):
     - Lastly, although rare, perfect perpendicular vectors (cosine similarity = 0) could be within the same cluster,
     which may give an inaccurate representation of a 'bad' cluster due to low cosine similarity score.
     """
-    name = "cluster_cosine_similarity"
-    required_inputs = ["model", "dataset"]
-    tasks = ["clustering"]
-    tags = [
-        "sklearn",
-        "model_performance",
-    ]
-    def run(self):
-        y_true_train = self.inputs.dataset.y
-        y_pred_train = self.inputs.dataset.y_pred(self.inputs.model)
-        y_true_train = y_true_train.astype(y_pred_train.dtype).flatten()
-        num_clusters = len(np.unique(y_pred_train))
-        # Calculate cosine similarity for each cluster
-        results = []
-        for cluster_id in range(num_clusters):
-            cluster_mask = y_pred_train == cluster_id
-            cluster_data = self.inputs.dataset.x[cluster_mask]
-            if cluster_data.size != 0:
-                # Compute the centroid of the cluster
-                cluster_centroid = np.mean(cluster_data, axis=0)
-                # Compute cosine similarities between the centroid and data points in the cluster
-                cosine_similarities = cosine_similarity(
-                    cluster_data, [cluster_centroid]
-                )
-                # Extract cosine similarity values for each data point in the cluster
-                cosine_similarities = cosine_similarities.flatten()
-                results.append(
-                    {
-                        "Cluster": cluster_id,
-                        "Mean Cosine Similarity": np.mean(cosine_similarities),
-                    }
-                )
-        return self.cache_results(
-            {
-                "cosine_similarity": pd.DataFrame(results).to_dict(orient="records"),
-            }
-        )
-    def summary(self, metric_value):
-        """
-        Build one table for summarizing the cluster cosine similarity results
-        """
-        summary_regression = metric_value["cosine_similarity"]
-        return ResultSummary(
-            results=[
-                ResultTable(
-                    data=summary_regression,
-                    metadata=ResultTableMetadata(
-                        title="Cluster Cosine Similarity Results"
+    y_pred = dataset.y_pred(model)
+    num_clusters = len(np.unique(y_pred))
+    table = []
+    for cluster_idx in range(num_clusters):
+        cluster_data = dataset.x[y_pred == cluster_idx]
+        if cluster_data.size != 0:
+            cluster_centroid = np.mean(cluster_data, axis=0)
+            table.append(
+                {
+                    "Cluster": cluster_idx,
+                    "Mean Cosine Similarity": np.mean(
+                        cosine_similarity(
+                            X=cluster_data,
+                            Y=[cluster_centroid],
+                        ).flatten()
                     ),
-                ),
-            ]
-        )
+                }
+            )
+    if not table:
+        raise SkipTestError("No clusters found")
+    return table

validmind/tests/model_validation/sklearn/ClusterPerformanceMetrics.py CHANGED Viewed

@@ -2,17 +2,74 @@
 # See the LICENSE file in the root of this repository for details.
 # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
-from dataclasses import dataclass
-from sklearn import metrics
-from validmind.vm_models import ResultSummary, ResultTable
-from .ClusterPerformance import ClusterPerformance
-@dataclass
-class ClusterPerformanceMetrics(ClusterPerformance):
+from sklearn.metrics import (
+    adjusted_mutual_info_score,
+    adjusted_rand_score,
+    completeness_score,
+    fowlkes_mallows_score,
+    homogeneity_score,
+    v_measure_score,
+)
+from validmind import tags, tasks
+from validmind.vm_models import VMDataset, VMModel
+HOMOGENEITY = """
+The homogeneity score is a clustering evaluation metric that quantifies the degree to which each cluster within a
+clustering solution contains only data points that belong to a single true class or category. It provides a score
+within the range of 0 to 1, where a higher homogeneity score indicates that the clusters are more pure and internally
+consistent with respect to the ground truth labels, meaning that the data points within each cluster are closely related
+in terms of their actual class membership.
+"""
+COMPLETENESS = """
+The completeness score is a clustering evaluation metric used to assess how well a clustering solution captures all data points
+that belong to a single true class or category. It quantifies the extent to which the data points of a given class are
+grouped into a single cluster. The completeness score ranges from 0 to 1, with a higher score indicating that the clustering
+solution effectively accounts for all data points within their actual class, emphasizing the comprehensiveness of the
+clustering results with respect to the ground truth labels.
+"""
+V_MEASURE = """
+The V-Measure score is a clustering evaluation metric that combines both homogeneity and completeness to provide a
+single measure of the overall quality of a clustering solution. It takes into account how well clusters are internally
+coherent (homogeneity) and how well they capture all data points from the true classes (completeness). The V-Measure
+score ranges from 0 to 1, where a higher score indicates a better clustering result. It balances the trade-off between
+cluster purity and the extent to which all data points from true classes are captured, offering a comprehensive evaluation
+of the clustering performance.
+"""
+ADJUSTED_RAND_INDEX = """
+The Adjusted Rand Index (ARI) is a clustering evaluation metric used to measure the
+similarity between the cluster assignments in a clustering solution and the true class labels. It calculates a
+score that ranges from -1 to 1, with a higher score indicating a better clustering result. A score of 1 signifies
+perfect agreement between the clustering and the ground truth, while a score near 0 implies that the clustering
+is random with respect to the true labels, and negative values indicate disagreement. ARI accounts for chance
+clustering, making it a robust measure for assessing the quality of clustering solutions by considering both the
+extent of agreement and potential randomness in the assignments.
+"""
+ADJUSTED_MUTUAL_INFORMATION = """
+The Adjusted Mutual Information (AMI) is a clustering evaluation metric used to quantify the degree of
+agreement between a clustering solution and the true class labels. It provides a score that ranges from 0 to 1,
+with a higher score indicating a better clustering result. A score of 1 signifies perfect agreement,
+while a score of 0 suggests that the clustering is random with respect to the true labels. AMI takes into account the
+potential randomness in the assignments and adjusts for chance, making it a robust measure that considers both the
+extent of agreement and the potential for random clustering.
+"""
+FOULKES_MALLOWS_SCORE = """
+The Fowlkes-Mallows score is a clustering evaluation metric used to assess the quality of
+a clustering solution by measuring the geometric mean of two fundamental clustering metrics: precision and recall. It
+provides a score that ranges from 0 to 1, where a higher score indicates a better clustering result. A score of 1 signifies
+perfect agreement with the true class labels, while lower scores suggest less precise and recall clustering performance.
+The Fowlkes-Mallows score offers a balanced evaluation of clustering quality by considering both the ability to correctly
+identify members of the same class (precision) and the ability to capture all members of the same class (recall).
+"""
+@tags("sklearn", "model_performance", "clustering")
+@tasks("clustering")
+def ClusterPerformanceMetrics(model: VMModel, dataset: VMDataset):
     """
     Evaluates the performance of clustering machine learning models using multiple established metrics.
@@ -58,75 +115,53 @@ class ClusterPerformanceMetrics(ClusterPerformance):
     - Does not consider aspects like computational efficiency of the model or its capability to handle high dimensional
     data.
     """
-    name = "homogeneity_score"
-    required_inputs = ["model", "dataset"]
-    tasks = ["clustering"]
-    tags = ["sklearn", "model_performance"]
-    default_metrics = {
-        "Homogeneity Score": metrics.homogeneity_score,
-        "Completeness Score": metrics.completeness_score,
-        "V Measure": metrics.v_measure_score,
-        "Adjusted Rand Index": metrics.adjusted_rand_score,
-        "Adjusted Mutual Information": metrics.adjusted_mutual_info_score,
-        "Fowlkes-Mallows score": metrics.fowlkes_mallows_score,
-    }
-    default_metrics_desc = {
-        "Homogeneity Score": """The homogeneity score is a clustering evaluation metric that quantifies
-        the degree to which each cluster within a clustering solution contains only data points that belong
-        to a single true class or category. It provides a score within the range of 0 to 1, where a higher
-        homogeneity score indicates that the clusters are more pure and internally consistent with respect
-        to the ground truth labels, meaning that the data points within each cluster are closely related in
-        terms of their actual class membership.
-        """,
-        "Completeness Score": """The completeness score is a clustering evaluation metric used to assess how
-        well a clustering solution captures all data points that belong to a single true class or category.
-        It quantifies the extent to which the data points of a given class are grouped into a single cluster.
-        The completeness score ranges from 0 to 1, with a higher score indicating that the clustering solution
-        effectively accounts for all data points within their actual class, emphasizing the comprehensiveness of
-        the clustering results with respect to the ground truth labels.""",
-        "V Measure": """The V-Measure score is a clustering evaluation metric that combines both homogeneity and
-        completeness to provide a single measure of the overall quality of a clustering solution. It takes into
-        account how well clusters are internally coherent (homogeneity) and how well they capture all data points
-        from the true classes (completeness). The V-Measure score ranges from 0 to 1, where a higher score indicates
-        a better clustering result. It balances the trade-off between cluster purity and the extent to which all data
-        points from true classes are captured, offering a comprehensive evaluation of the clustering performance.""",
-        "Adjusted Rand Index": """The Adjusted Rand Index (ARI) is a clustering evaluation metric used to measure the
-        similarity between the cluster assignments in a clustering solution and the true class labels. It calculates a
-        score that ranges from -1 to 1, with a higher score indicating a better clustering result. A score of 1 signifies
-        perfect agreement between the clustering and the ground truth, while a score near 0 implies that the clustering
-        is random with respect to the true labels, and negative values indicate disagreement. ARI accounts for chance
-        clustering, making it a robust measure for assessing the quality of clustering solutions by considering both the
-        extent of agreement and potential randomness in the assignments.""",
-        "Adjusted Mutual Information": """The Adjusted Mutual Information (AMI) is a clustering evaluation metric used to
-        quantify the degree of agreement between a clustering solution and the true class labels. It provides a score that
-        ranges from 0 to 1, with a higher score indicating a better clustering result. A score of 1 signifies perfect agreement,
-        while a score of 0 suggests that the clustering is random with respect to the true labels. AMI takes into account the
-        potential randomness in the assignments and adjusts for chance, making it a robust measure that considers both the
-        extent of agreement and the potential for random clustering.""",
-        "Fowlkes-Mallows score": """The Fowlkes-Mallows score is a clustering evaluation metric used to assess the quality of
-        a clustering solution by measuring the geometric mean of two fundamental clustering metrics: precision and recall. It
-        provides a score that ranges from 0 to 1, where a higher score indicates a better clustering result. A score of 1 signifies
-        perfect agreement with the true class labels, while lower scores suggest less precise and recall clustering performance.
-        The Fowlkes-Mallows score offers a balanced evaluation of clustering quality by considering both the ability to correctly
-        identify members of the same class (precision) and the ability to capture all members of the same class (recall).""",
-    }
-    def summary(self, raw_results):
-        """
-        Returns a summarized representation of the dataset split information
-        """
-        table_records = []
-        for result in raw_results:
-            for key, _ in result.items():
-                table_records.append(
-                    {
-                        "Description": self.default_metrics_desc[key],
-                        key: result[key],
-                    }
-                )
-        return ResultSummary(results=[ResultTable(data=table_records)])
-    def metric_info(self):
-        return self.default_metrics
+    return [
+        {
+            "Metric": "Homogeneity Score",
+            "Description": HOMOGENEITY,
+            "Value": homogeneity_score(
+                labels_true=dataset.y,
+                labels_pred=dataset.y_pred(model),
+            ),
+        },
+        {
+            "Metric": "Completeness Score",
+            "Description": COMPLETENESS,
+            "Value": completeness_score(
+                labels_true=dataset.y,
+                labels_pred=dataset.y_pred(model),
+            ),
+        },
+        {
+            "Metric": "V Measure",
+            "Description": V_MEASURE,
+            "Value": v_measure_score(
+                labels_true=dataset.y,
+                labels_pred=dataset.y_pred(model),
+            ),
+        },
+        {
+            "Metric": "Adjusted Rand Index",
+            "Description": ADJUSTED_RAND_INDEX,
+            "Value": adjusted_rand_score(
+                labels_true=dataset.y,
+                labels_pred=dataset.y_pred(model),
+            ),
+        },
+        {
+            "Metric": "Adjusted Mutual Information",
+            "Description": ADJUSTED_MUTUAL_INFORMATION,
+            "Value": adjusted_mutual_info_score(
+                labels_true=dataset.y,
+                labels_pred=dataset.y_pred(model),
+            ),
+        },
+        {
+            "Metric": "Fowlkes-Mallows score",
+            "Description": FOULKES_MALLOWS_SCORE,
+            "Value": fowlkes_mallows_score(
+                labels_true=dataset.y,
+                labels_pred=dataset.y_pred(model),
+            ),
+        },
+    ]

validmind/tests/model_validation/sklearn/CompletenessScore.py CHANGED Viewed

@@ -2,15 +2,15 @@
 # See the LICENSE file in the root of this repository for details.
 # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
-from dataclasses import dataclass
+from sklearn.metrics import completeness_score
-from sklearn import metrics
+from validmind import tags, tasks
+from validmind.vm_models import VMDataset, VMModel
-from .ClusterPerformance import ClusterPerformance
-@dataclass
-class CompletenessScore(ClusterPerformance):
+@tags("sklearn", "model_performance", "clustering")
+@tasks("clustering")
+def CompletenessScore(model: VMModel, dataset: VMDataset):
     """
     Evaluates a clustering model's capacity to categorize instances from a single class into the same cluster.
@@ -47,14 +47,11 @@ class CompletenessScore(ClusterPerformance):
     - The Completeness Score only applies to clustering models; it cannot be used for other types of machine learning
     models.
     """
-    name = "homogeneity_score"
-    required_inputs = ["model", "dataset"]
-    tasks = ["clustering"]
-    tags = [
-        "sklearn",
-        "model_performance",
+    return [
+        {
+            "Completeness Score": completeness_score(
+                labels_true=dataset.y,
+                labels_pred=dataset.y_pred(model),
+            )
+        }
     ]
-    def metric_info(self):
-        return {"Completeness Score": metrics.completeness_score}

validmind 2.5.24__py3-none-any.whl → 2.6.7__py3-none-any.whl

validmind 2.5.24py3-none-any.whl → 2.6.7py3-none-any.whl