validmind 2.5.25__py3-none-any.whl → 2.6.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (198) hide show
  1. validmind/__init__.py +8 -17
  2. validmind/__version__.py +1 -1
  3. validmind/ai/test_descriptions.py +66 -85
  4. validmind/ai/test_result_description/context.py +2 -2
  5. validmind/ai/utils.py +26 -1
  6. validmind/api_client.py +43 -79
  7. validmind/client.py +5 -7
  8. validmind/client_config.py +1 -1
  9. validmind/datasets/__init__.py +1 -1
  10. validmind/datasets/classification/customer_churn.py +7 -5
  11. validmind/datasets/nlp/__init__.py +2 -2
  12. validmind/errors.py +6 -10
  13. validmind/html_templates/content_blocks.py +18 -16
  14. validmind/logging.py +21 -16
  15. validmind/tests/__init__.py +28 -5
  16. validmind/tests/__types__.py +186 -170
  17. validmind/tests/_store.py +7 -21
  18. validmind/tests/comparison.py +362 -0
  19. validmind/tests/data_validation/ACFandPACFPlot.py +44 -73
  20. validmind/tests/data_validation/ADF.py +49 -83
  21. validmind/tests/data_validation/AutoAR.py +59 -96
  22. validmind/tests/data_validation/AutoMA.py +59 -96
  23. validmind/tests/data_validation/AutoStationarity.py +66 -114
  24. validmind/tests/data_validation/ClassImbalance.py +48 -117
  25. validmind/tests/data_validation/DatasetDescription.py +180 -209
  26. validmind/tests/data_validation/DatasetSplit.py +50 -75
  27. validmind/tests/data_validation/DescriptiveStatistics.py +59 -85
  28. validmind/tests/data_validation/{DFGLSArch.py → DickeyFullerGLS.py} +44 -76
  29. validmind/tests/data_validation/Duplicates.py +21 -90
  30. validmind/tests/data_validation/EngleGrangerCoint.py +53 -75
  31. validmind/tests/data_validation/HighCardinality.py +32 -80
  32. validmind/tests/data_validation/HighPearsonCorrelation.py +29 -97
  33. validmind/tests/data_validation/IQROutliersBarPlot.py +63 -94
  34. validmind/tests/data_validation/IQROutliersTable.py +40 -80
  35. validmind/tests/data_validation/IsolationForestOutliers.py +41 -63
  36. validmind/tests/data_validation/KPSS.py +33 -81
  37. validmind/tests/data_validation/LaggedCorrelationHeatmap.py +47 -95
  38. validmind/tests/data_validation/MissingValues.py +17 -58
  39. validmind/tests/data_validation/MissingValuesBarPlot.py +61 -87
  40. validmind/tests/data_validation/PhillipsPerronArch.py +56 -79
  41. validmind/tests/data_validation/RollingStatsPlot.py +50 -81
  42. validmind/tests/data_validation/SeasonalDecompose.py +102 -184
  43. validmind/tests/data_validation/Skewness.py +27 -64
  44. validmind/tests/data_validation/SpreadPlot.py +34 -57
  45. validmind/tests/data_validation/TabularCategoricalBarPlots.py +46 -65
  46. validmind/tests/data_validation/TabularDateTimeHistograms.py +23 -45
  47. validmind/tests/data_validation/TabularNumericalHistograms.py +27 -46
  48. validmind/tests/data_validation/TargetRateBarPlots.py +54 -93
  49. validmind/tests/data_validation/TimeSeriesFrequency.py +48 -133
  50. validmind/tests/data_validation/TimeSeriesHistogram.py +24 -3
  51. validmind/tests/data_validation/TimeSeriesLinePlot.py +29 -47
  52. validmind/tests/data_validation/TimeSeriesMissingValues.py +59 -135
  53. validmind/tests/data_validation/TimeSeriesOutliers.py +54 -171
  54. validmind/tests/data_validation/TooManyZeroValues.py +21 -70
  55. validmind/tests/data_validation/UniqueRows.py +23 -62
  56. validmind/tests/data_validation/WOEBinPlots.py +83 -109
  57. validmind/tests/data_validation/WOEBinTable.py +28 -69
  58. validmind/tests/data_validation/ZivotAndrewsArch.py +33 -75
  59. validmind/tests/data_validation/nlp/CommonWords.py +49 -57
  60. validmind/tests/data_validation/nlp/Hashtags.py +27 -49
  61. validmind/tests/data_validation/nlp/LanguageDetection.py +7 -13
  62. validmind/tests/data_validation/nlp/Mentions.py +32 -63
  63. validmind/tests/data_validation/nlp/PolarityAndSubjectivity.py +89 -14
  64. validmind/tests/data_validation/nlp/Punctuations.py +63 -47
  65. validmind/tests/data_validation/nlp/Sentiment.py +4 -0
  66. validmind/tests/data_validation/nlp/StopWords.py +62 -91
  67. validmind/tests/data_validation/nlp/TextDescription.py +116 -159
  68. validmind/tests/data_validation/nlp/Toxicity.py +12 -4
  69. validmind/tests/decorator.py +33 -242
  70. validmind/tests/load.py +212 -153
  71. validmind/tests/model_validation/BertScore.py +13 -7
  72. validmind/tests/model_validation/BleuScore.py +4 -0
  73. validmind/tests/model_validation/ClusterSizeDistribution.py +24 -47
  74. validmind/tests/model_validation/ContextualRecall.py +3 -0
  75. validmind/tests/model_validation/FeaturesAUC.py +43 -74
  76. validmind/tests/model_validation/MeteorScore.py +3 -0
  77. validmind/tests/model_validation/RegardScore.py +5 -1
  78. validmind/tests/model_validation/RegressionResidualsPlot.py +54 -75
  79. validmind/tests/model_validation/embeddings/ClusterDistribution.py +10 -33
  80. validmind/tests/model_validation/embeddings/CosineSimilarityDistribution.py +11 -29
  81. validmind/tests/model_validation/embeddings/DescriptiveAnalytics.py +19 -31
  82. validmind/tests/model_validation/embeddings/EmbeddingsVisualization2D.py +40 -49
  83. validmind/tests/model_validation/embeddings/StabilityAnalysisKeyword.py +29 -15
  84. validmind/tests/model_validation/embeddings/StabilityAnalysisRandomNoise.py +25 -11
  85. validmind/tests/model_validation/embeddings/StabilityAnalysisSynonyms.py +28 -13
  86. validmind/tests/model_validation/embeddings/StabilityAnalysisTranslation.py +67 -38
  87. validmind/tests/model_validation/embeddings/utils.py +53 -0
  88. validmind/tests/model_validation/ragas/AnswerCorrectness.py +37 -32
  89. validmind/tests/model_validation/ragas/{AspectCritique.py → AspectCritic.py} +33 -27
  90. validmind/tests/model_validation/ragas/ContextEntityRecall.py +44 -41
  91. validmind/tests/model_validation/ragas/ContextPrecision.py +40 -35
  92. validmind/tests/model_validation/ragas/ContextPrecisionWithoutReference.py +133 -0
  93. validmind/tests/model_validation/ragas/ContextRecall.py +40 -35
  94. validmind/tests/model_validation/ragas/Faithfulness.py +42 -30
  95. validmind/tests/model_validation/ragas/NoiseSensitivity.py +59 -35
  96. validmind/tests/model_validation/ragas/{AnswerRelevance.py → ResponseRelevancy.py} +52 -41
  97. validmind/tests/model_validation/ragas/{AnswerSimilarity.py → SemanticSimilarity.py} +39 -34
  98. validmind/tests/model_validation/sklearn/AdjustedMutualInformation.py +13 -16
  99. validmind/tests/model_validation/sklearn/AdjustedRandIndex.py +13 -16
  100. validmind/tests/model_validation/sklearn/ClassifierPerformance.py +51 -89
  101. validmind/tests/model_validation/sklearn/ClusterCosineSimilarity.py +31 -61
  102. validmind/tests/model_validation/sklearn/ClusterPerformanceMetrics.py +118 -83
  103. validmind/tests/model_validation/sklearn/CompletenessScore.py +13 -16
  104. validmind/tests/model_validation/sklearn/ConfusionMatrix.py +62 -94
  105. validmind/tests/model_validation/sklearn/FeatureImportance.py +7 -8
  106. validmind/tests/model_validation/sklearn/FowlkesMallowsScore.py +12 -15
  107. validmind/tests/model_validation/sklearn/HomogeneityScore.py +12 -15
  108. validmind/tests/model_validation/sklearn/HyperParametersTuning.py +23 -53
  109. validmind/tests/model_validation/sklearn/KMeansClustersOptimization.py +60 -74
  110. validmind/tests/model_validation/sklearn/MinimumAccuracy.py +16 -84
  111. validmind/tests/model_validation/sklearn/MinimumF1Score.py +22 -72
  112. validmind/tests/model_validation/sklearn/MinimumROCAUCScore.py +29 -78
  113. validmind/tests/model_validation/sklearn/ModelsPerformanceComparison.py +52 -82
  114. validmind/tests/model_validation/sklearn/OverfitDiagnosis.py +51 -145
  115. validmind/tests/model_validation/sklearn/PermutationFeatureImportance.py +60 -78
  116. validmind/tests/model_validation/sklearn/PopulationStabilityIndex.py +130 -172
  117. validmind/tests/model_validation/sklearn/PrecisionRecallCurve.py +26 -55
  118. validmind/tests/model_validation/sklearn/ROCCurve.py +43 -77
  119. validmind/tests/model_validation/sklearn/RegressionPerformance.py +41 -94
  120. validmind/tests/model_validation/sklearn/RobustnessDiagnosis.py +47 -136
  121. validmind/tests/model_validation/sklearn/SHAPGlobalImportance.py +164 -208
  122. validmind/tests/model_validation/sklearn/SilhouettePlot.py +54 -99
  123. validmind/tests/model_validation/sklearn/TrainingTestDegradation.py +50 -124
  124. validmind/tests/model_validation/sklearn/VMeasure.py +12 -15
  125. validmind/tests/model_validation/sklearn/WeakspotsDiagnosis.py +225 -281
  126. validmind/tests/model_validation/statsmodels/AutoARIMA.py +40 -45
  127. validmind/tests/model_validation/statsmodels/KolmogorovSmirnov.py +22 -47
  128. validmind/tests/model_validation/statsmodels/Lilliefors.py +17 -28
  129. validmind/tests/model_validation/statsmodels/RegressionFeatureSignificance.py +37 -81
  130. validmind/tests/model_validation/statsmodels/RegressionModelForecastPlot.py +37 -105
  131. validmind/tests/model_validation/statsmodels/RegressionModelForecastPlotLevels.py +62 -166
  132. validmind/tests/model_validation/statsmodels/RegressionModelSensitivityPlot.py +57 -119
  133. validmind/tests/model_validation/statsmodels/RegressionModelSummary.py +20 -57
  134. validmind/tests/model_validation/statsmodels/RegressionPermutationFeatureImportance.py +47 -80
  135. validmind/tests/ongoing_monitoring/PredictionCorrelation.py +2 -0
  136. validmind/tests/ongoing_monitoring/TargetPredictionDistributionPlot.py +4 -2
  137. validmind/tests/output.py +120 -0
  138. validmind/tests/prompt_validation/Bias.py +55 -98
  139. validmind/tests/prompt_validation/Clarity.py +56 -99
  140. validmind/tests/prompt_validation/Conciseness.py +63 -101
  141. validmind/tests/prompt_validation/Delimitation.py +48 -89
  142. validmind/tests/prompt_validation/NegativeInstruction.py +62 -96
  143. validmind/tests/prompt_validation/Robustness.py +80 -121
  144. validmind/tests/prompt_validation/Specificity.py +61 -95
  145. validmind/tests/prompt_validation/ai_powered_test.py +2 -2
  146. validmind/tests/run.py +314 -496
  147. validmind/tests/test_providers.py +109 -79
  148. validmind/tests/utils.py +91 -0
  149. validmind/unit_metrics/__init__.py +16 -155
  150. validmind/unit_metrics/classification/F1.py +1 -0
  151. validmind/unit_metrics/classification/Precision.py +1 -0
  152. validmind/unit_metrics/classification/ROC_AUC.py +1 -0
  153. validmind/unit_metrics/classification/Recall.py +1 -0
  154. validmind/unit_metrics/regression/AdjustedRSquaredScore.py +1 -0
  155. validmind/unit_metrics/regression/GiniCoefficient.py +1 -0
  156. validmind/unit_metrics/regression/HuberLoss.py +1 -0
  157. validmind/unit_metrics/regression/KolmogorovSmirnovStatistic.py +1 -0
  158. validmind/unit_metrics/regression/MeanAbsoluteError.py +1 -0
  159. validmind/unit_metrics/regression/MeanAbsolutePercentageError.py +1 -0
  160. validmind/unit_metrics/regression/MeanBiasDeviation.py +1 -0
  161. validmind/unit_metrics/regression/MeanSquaredError.py +1 -0
  162. validmind/unit_metrics/regression/QuantileLoss.py +1 -0
  163. validmind/unit_metrics/regression/RSquaredScore.py +2 -1
  164. validmind/unit_metrics/regression/RootMeanSquaredError.py +1 -0
  165. validmind/utils.py +66 -17
  166. validmind/vm_models/__init__.py +2 -17
  167. validmind/vm_models/dataset/dataset.py +31 -4
  168. validmind/vm_models/figure.py +7 -37
  169. validmind/vm_models/model.py +3 -0
  170. validmind/vm_models/result/__init__.py +7 -0
  171. validmind/vm_models/result/result.jinja +21 -0
  172. validmind/vm_models/result/result.py +337 -0
  173. validmind/vm_models/result/utils.py +160 -0
  174. validmind/vm_models/test_suite/runner.py +16 -54
  175. validmind/vm_models/test_suite/summary.py +3 -3
  176. validmind/vm_models/test_suite/test.py +43 -77
  177. validmind/vm_models/test_suite/test_suite.py +8 -40
  178. validmind-2.6.7.dist-info/METADATA +137 -0
  179. {validmind-2.5.25.dist-info → validmind-2.6.7.dist-info}/RECORD +182 -189
  180. validmind/tests/data_validation/AutoSeasonality.py +0 -190
  181. validmind/tests/metadata.py +0 -59
  182. validmind/tests/model_validation/embeddings/StabilityAnalysis.py +0 -176
  183. validmind/tests/model_validation/ragas/ContextUtilization.py +0 -161
  184. validmind/tests/model_validation/sklearn/ClusterPerformance.py +0 -80
  185. validmind/unit_metrics/composite.py +0 -238
  186. validmind/vm_models/test/metric.py +0 -98
  187. validmind/vm_models/test/metric_result.py +0 -61
  188. validmind/vm_models/test/output_template.py +0 -55
  189. validmind/vm_models/test/result_summary.py +0 -76
  190. validmind/vm_models/test/result_wrapper.py +0 -488
  191. validmind/vm_models/test/test.py +0 -103
  192. validmind/vm_models/test/threshold_test.py +0 -106
  193. validmind/vm_models/test/threshold_test_result.py +0 -75
  194. validmind/vm_models/test_context.py +0 -259
  195. validmind-2.5.25.dist-info/METADATA +0 -118
  196. {validmind-2.5.25.dist-info → validmind-2.6.7.dist-info}/LICENSE +0 -0
  197. {validmind-2.5.25.dist-info → validmind-2.6.7.dist-info}/WHEEL +0 -0
  198. {validmind-2.5.25.dist-info → validmind-2.6.7.dist-info}/entry_points.txt +0 -0
@@ -2,22 +2,22 @@
2
2
  # See the LICENSE file in the root of this repository for details.
3
3
  # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
4
 
5
- from dataclasses import dataclass
6
-
7
5
  import numpy as np
8
6
  import pandas as pd
9
7
  import plotly.graph_objects as go
10
8
  from sklearn.metrics import roc_auc_score
11
9
 
10
+ from validmind import tags, tasks
12
11
  from validmind.errors import SkipTestError
13
12
  from validmind.logging import get_logger
14
- from validmind.vm_models import Figure, Metric
13
+ from validmind.vm_models import VMDataset
15
14
 
16
15
  logger = get_logger(__name__)
17
16
 
18
17
 
19
- @dataclass
20
- class FeaturesAUC(Metric):
18
+ @tags("feature_importance", "AUC", "visualization")
19
+ @tasks("classification")
20
+ def FeaturesAUC(dataset: VMDataset, fontsize: int = 12, figure_height: int = 500):
21
21
  """
22
22
  Evaluates the discriminatory power of each individual feature within a binary classification model by calculating
23
23
  the Area Under the Curve (AUC) for each feature separately.
@@ -57,73 +57,42 @@ class FeaturesAUC(Metric):
57
57
  - This metric is applicable only to binary classification tasks and cannot be directly extended to multiclass
58
58
  classification or regression without modifications.
59
59
  """
60
-
61
- name = "features_auc"
62
- required_inputs = ["model", "dataset"]
63
- default_params = {
64
- "fontsize": 12,
65
- "figure_height": 500,
66
- }
67
- tasks = ["classification"]
68
- tags = [
69
- "feature_importance",
70
- "AUC",
71
- "visualization",
72
- ]
73
-
74
- def run(self):
75
- dataset = self.inputs.dataset
76
- x = dataset.x_df()
77
- y = dataset.y_df()
78
- n_targets = dataset.df[dataset.target_column].nunique()
79
-
80
- if n_targets != 2:
81
- raise SkipTestError("FeaturesAUC metric requires a binary target variable.")
82
-
83
- aucs = pd.DataFrame(index=x.columns, columns=["AUC"])
84
-
85
- for column in x.columns:
86
- feature_values = x[column]
87
- if feature_values.nunique() > 1:
88
- auc_score = roc_auc_score(y, feature_values)
89
- aucs.loc[column, "AUC"] = auc_score
90
- else:
91
- aucs.loc[
92
- column, "AUC"
93
- ] = np.nan # Not enough unique values to calculate AUC
94
-
95
- # Sorting the AUC scores in descending order
96
- sorted_indices = aucs["AUC"].dropna().sort_values(ascending=False).index
97
-
98
- # Plotting the results
99
- fig = go.Figure()
100
- fig.add_trace(
101
- go.Bar(
102
- y=[column for column in sorted_indices],
103
- x=[aucs.loc[column, "AUC"] for column in sorted_indices],
104
- orientation="h",
105
- )
106
- )
107
- fig.update_layout(
108
- title_text="Feature AUC Scores",
109
- yaxis=dict(
110
- tickmode="linear",
111
- dtick=1,
112
- tickfont=dict(size=self.params["fontsize"]),
113
- title="Features",
114
- autorange="reversed", # Ensure that the highest AUC is at the top
115
- ),
116
- xaxis=dict(title="AUC"),
117
- height=self.params["figure_height"],
118
- )
119
-
120
- return self.cache_results(
121
- metric_value=aucs.to_dict(),
122
- figures=[
123
- Figure(
124
- for_object=self,
125
- key="features_auc",
126
- figure=fig,
127
- ),
128
- ],
60
+ if len(np.unique(dataset.y)) != 2:
61
+ raise SkipTestError("FeaturesAUC metric requires a binary target variable.")
62
+
63
+ aucs = pd.DataFrame(index=dataset.feature_columns, columns=["AUC"])
64
+
65
+ for column in dataset.feature_columns:
66
+ feature_values = dataset.df[column]
67
+ if feature_values.nunique() > 1 and pd.api.types.is_numeric_dtype(
68
+ feature_values
69
+ ):
70
+ aucs.loc[column, "AUC"] = roc_auc_score(dataset.y, feature_values)
71
+ else:
72
+ # Not enough unique values to calculate AUC
73
+ aucs.loc[column, "AUC"] = np.nan
74
+
75
+ sorted_indices = aucs["AUC"].dropna().sort_values(ascending=False).index
76
+
77
+ fig = go.Figure()
78
+ fig.add_trace(
79
+ go.Bar(
80
+ y=[column for column in sorted_indices],
81
+ x=[aucs.loc[column, "AUC"] for column in sorted_indices],
82
+ orientation="h",
129
83
  )
84
+ )
85
+ fig.update_layout(
86
+ title_text="Feature AUC Scores",
87
+ yaxis=dict(
88
+ tickmode="linear",
89
+ dtick=1,
90
+ tickfont=dict(size=fontsize),
91
+ title="Features",
92
+ autorange="reversed", # Ensure that the highest AUC is at the top
93
+ ),
94
+ xaxis=dict(title="AUC"),
95
+ height=figure_height,
96
+ )
97
+
98
+ return fig
@@ -7,6 +7,7 @@ import pandas as pd
7
7
  import plotly.graph_objects as go
8
8
 
9
9
  from validmind import tags, tasks
10
+ from validmind.tests.utils import validate_prediction
10
11
 
11
12
 
12
13
  @tags("nlp", "text_data", "visualization")
@@ -65,6 +66,8 @@ def MeteorScore(dataset, model):
65
66
  y_true = dataset.y
66
67
  y_pred = dataset.y_pred(model)
67
68
 
69
+ validate_prediction(y_true, y_pred)
70
+
68
71
  # Load the METEOR evaluation metric
69
72
  meteor = evaluate.load("meteor")
70
73
 
@@ -7,6 +7,7 @@ import pandas as pd
7
7
  import plotly.graph_objects as go
8
8
 
9
9
  from validmind import tags, tasks
10
+ from validmind.tests.utils import validate_prediction
10
11
 
11
12
 
12
13
  @tags("nlp", "text_data", "visualization")
@@ -56,8 +57,11 @@ def RegardScore(dataset, model):
56
57
  y_true = dataset.y
57
58
  y_pred = dataset.y_pred(model)
58
59
 
60
+ # Ensure equal lengths and get truncated data if necessary
61
+ y_true, y_pred = validate_prediction(y_true, y_pred)
62
+
59
63
  # Load the regard evaluation metric
60
- regard_tool = evaluate.load("regard")
64
+ regard_tool = evaluate.load("regard", module_type="measurement")
61
65
 
62
66
  # Function to calculate regard scores
63
67
  def compute_regard_scores(texts):
@@ -2,17 +2,17 @@
2
2
  # See the LICENSE file in the root of this repository for details.
3
3
  # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
4
 
5
- from dataclasses import dataclass
6
-
7
5
  import numpy as np
8
6
  import plotly.figure_factory as ff
9
7
  import plotly.graph_objects as go
10
8
 
11
- from validmind.vm_models import Figure, Metric
9
+ from validmind import tags, tasks
10
+ from validmind.vm_models import VMDataset, VMModel
12
11
 
13
12
 
14
- @dataclass
15
- class RegressionResidualsPlot(Metric):
13
+ @tags("model_performance", "visualization")
14
+ @tasks("regression")
15
+ def RegressionResidualsPlot(model: VMModel, dataset: VMDataset, bin_size: float = 0.1):
16
16
  """
17
17
  Evaluates regression model performance using residual distribution and actual vs. predicted plots.
18
18
 
@@ -54,75 +54,54 @@ class RegressionResidualsPlot(Metric):
54
54
  - Does not summarize model performance into a single quantifiable metric, which might be needed for comparative or
55
55
  summary analyses.
56
56
  """
57
-
58
- name = "regression_residuals_plot"
59
- required_inputs = ["model", "dataset"]
60
- tasks = ["regression"]
61
- tags = ["model_performance"]
62
- default_params = {"bin_size": 0.1}
63
-
64
- def run(self):
65
- y_true = self.inputs.dataset.y
66
- y_pred = self.inputs.dataset.y_pred(self.inputs.model)
67
- # Calculate residuals
68
- residuals = y_true.flatten() - y_pred.flatten()
69
- # Create residuals plot
70
- hist_data = [residuals]
71
- group_labels = ["Residuals"] # Names of the dataset
72
- bin_size = self.params["bin_size"]
73
- fig = ff.create_distplot(
74
- hist_data, group_labels, bin_size=[bin_size], show_hist=True, show_rug=False
75
- )
76
- fig.update_layout(
77
- title="Distribution of Residuals",
78
- xaxis_title="Residuals",
79
- yaxis_title="Density",
80
- )
81
- figures = [
82
- Figure(
83
- for_object=self,
84
- key=self.key,
85
- figure=fig,
86
- )
87
- ]
88
- # Create a scatter plot of actual vs predicted values
89
- scatter = go.Scatter(
90
- x=y_true.flatten(),
91
- y=y_pred.flatten(),
92
- mode="markers",
93
- name="True vs Predicted",
94
- marker=dict(color="blue", opacity=0.5),
95
- )
96
-
97
- # Line of perfect prediction
98
- max_val = np.nanmax([np.nanmax(y_true), np.nanmax(y_pred)])
99
- min_val = np.nanmin([np.nanmin(y_true), np.nanmin(y_pred)])
100
- line = go.Scatter(
101
- x=[min_val, max_val],
102
- y=[min_val, max_val],
103
- mode="lines",
104
- name="Perfect Fit",
105
- line=dict(color="red", dash="dash"),
57
+ y_true = dataset.y
58
+ y_pred = dataset.y_pred(model)
59
+
60
+ figures = []
61
+
62
+ # Residuals plot
63
+ fig = ff.create_distplot(
64
+ hist_data=[y_true.flatten() - y_pred.flatten()],
65
+ group_labels=["Residuals"],
66
+ bin_size=[bin_size],
67
+ show_hist=True,
68
+ show_rug=False,
69
+ )
70
+ fig.update_layout(
71
+ title="Distribution of Residuals",
72
+ xaxis_title="Residuals",
73
+ yaxis_title="Density",
74
+ )
75
+ figures.append(fig)
76
+
77
+ # True vs Predicted w/ perfect fit line plot
78
+ max_val = np.nanmax([np.nanmax(y_true), np.nanmax(y_pred)])
79
+ min_val = np.nanmin([np.nanmin(y_true), np.nanmin(y_pred)])
80
+ figures.append(
81
+ go.Figure(
82
+ data=[
83
+ go.Scatter(
84
+ x=y_true.flatten(),
85
+ y=y_pred.flatten(),
86
+ mode="markers",
87
+ name="True vs Predicted",
88
+ marker=dict(color="blue", opacity=0.5),
89
+ ),
90
+ go.Scatter(
91
+ x=[min_val, max_val],
92
+ y=[min_val, max_val],
93
+ mode="lines",
94
+ name="Perfect Fit",
95
+ line=dict(color="red", dash="dash"),
96
+ ),
97
+ ],
98
+ layout=go.Layout(
99
+ title="True vs. Predicted Values",
100
+ xaxis_title="True Values",
101
+ yaxis_title="Predicted Values",
102
+ showlegend=True,
103
+ ),
106
104
  )
105
+ )
107
106
 
108
- # Layout settings
109
- layout = go.Layout(
110
- title="True vs. Predicted Values",
111
- xaxis_title="True Values",
112
- yaxis_title="Predicted Values",
113
- showlegend=True,
114
- )
115
-
116
- fig = go.Figure(data=[scatter, line], layout=layout)
117
-
118
- figures.append(
119
- Figure(
120
- for_object=self,
121
- key=self.key,
122
- figure=fig,
123
- )
124
- )
125
-
126
- return self.cache_results(
127
- figures=figures,
128
- )
107
+ return tuple(figures)
@@ -5,10 +5,13 @@
5
5
  import plotly.express as px
6
6
  from sklearn.cluster import KMeans
7
7
 
8
- from validmind.vm_models import Figure, Metric
8
+ from validmind import tags, tasks
9
+ from validmind.vm_models import VMDataset, VMModel
9
10
 
10
11
 
11
- class ClusterDistribution(Metric):
12
+ @tags("llm", "text_data", "embeddings", "visualization")
13
+ @tasks("feature_extraction")
14
+ def ClusterDistribution(model: VMModel, dataset: VMDataset, num_clusters: int = 5):
12
15
  """
13
16
  Assesses the distribution of text embeddings across clusters produced by a model using KMeans clustering.
14
17
 
@@ -49,34 +52,8 @@ class ClusterDistribution(Metric):
49
52
  - Uses the KMeans clustering algorithm, which assumes that clusters are convex and isotropic, and may not work as
50
53
  intended if the true clusters in the data are not of this shape.
51
54
  """
52
-
53
- name = "Text Embeddings Cluster Distribution"
54
- required_inputs = ["model", "dataset"]
55
- default_params = {
56
- "num_clusters": 5,
57
- }
58
- tasks = ["feature_extraction"]
59
- tags = ["llm", "text_data", "embeddings", "visualization"]
60
-
61
- def run(self):
62
- # run kmeans clustering on embeddings
63
- kmeans = KMeans(n_clusters=self.params["num_clusters"]).fit(
64
- self.inputs.dataset.y_pred(self.inputs.model)
65
- )
66
-
67
- # plot the distribution
68
- fig = px.histogram(
69
- kmeans.labels_,
70
- nbins=self.params["num_clusters"],
71
- title="Embeddings Cluster Distribution",
72
- )
73
-
74
- return self.cache_results(
75
- figures=[
76
- Figure(
77
- for_object=self,
78
- key=self.key,
79
- figure=fig,
80
- )
81
- ],
82
- )
55
+ return px.histogram(
56
+ KMeans(n_clusters=num_clusters).fit(dataset.y_pred(model)).labels_,
57
+ nbins=num_clusters,
58
+ title="Embeddings Cluster Distribution",
59
+ )
@@ -5,10 +5,13 @@
5
5
  import plotly.express as px
6
6
  from sklearn.metrics.pairwise import cosine_similarity
7
7
 
8
- from validmind.vm_models import Figure, Metric
8
+ from validmind import tags, tasks
9
+ from validmind.vm_models import VMDataset, VMModel
9
10
 
10
11
 
11
- class CosineSimilarityDistribution(Metric):
12
+ @tags("llm", "text_data", "embeddings", "visualization")
13
+ @tasks("feature_extraction")
14
+ def CosineSimilarityDistribution(dataset: VMDataset, model: VMModel):
12
15
  """
13
16
  Assesses the similarity between predicted text embeddings from a model using a Cosine Similarity distribution
14
17
  histogram.
@@ -49,30 +52,9 @@ class CosineSimilarityDistribution(Metric):
49
52
  - The output is sensitive to the choice of bin number for the histogram. Different bin numbers could give a
50
53
  slightly altered perspective on the distribution of cosine similarity.
51
54
  """
52
-
53
- name = "Text Embeddings Cosine Similarity Distribution"
54
- required_inputs = ["model", "dataset"]
55
- tasks = ["feature_extraction"]
56
- tags = ["llm", "text_data", "embeddings", "visualization"]
57
-
58
- def run(self):
59
- # Compute cosine similarity
60
- similarities = cosine_similarity(self.inputs.dataset.y_pred(self.inputs.model))
61
-
62
- # plot the distribution
63
- fig = px.histogram(
64
- x=similarities.flatten(),
65
- nbins=100,
66
- title="Cosine Similarity Distribution",
67
- labels={"x": "Cosine Similarity"},
68
- )
69
-
70
- return self.cache_results(
71
- figures=[
72
- Figure(
73
- for_object=self,
74
- key=self.key,
75
- figure=fig,
76
- )
77
- ],
78
- )
55
+ return px.histogram(
56
+ x=cosine_similarity(dataset.y_pred(model)).flatten(),
57
+ nbins=100,
58
+ title="Cosine Similarity Distribution",
59
+ labels={"x": "Cosine Similarity"},
60
+ )
@@ -5,10 +5,13 @@
5
5
  import numpy as np
6
6
  import plotly.express as px
7
7
 
8
- from validmind.vm_models import Figure, Metric
8
+ from validmind import tags, tasks
9
+ from validmind.vm_models import VMDataset, VMModel
9
10
 
10
11
 
11
- class DescriptiveAnalytics(Metric):
12
+ @tags("llm", "text_data", "embeddings", "visualization")
13
+ @tasks("feature_extraction")
14
+ def DescriptiveAnalytics(dataset: VMDataset, model: VMModel):
12
15
  """
13
16
  Evaluates statistical properties of text embeddings in an ML model via mean, median, and standard deviation
14
17
  histograms.
@@ -52,32 +55,17 @@ class DescriptiveAnalytics(Metric):
52
55
  - While it displays valuable information about the central tendency and spread of data, it does not provide
53
56
  information about correlations between different embedding dimensions.
54
57
  """
55
-
56
- name = "Descriptive Analytics for Text Embeddings Models"
57
- required_inputs = ["model", "dataset"]
58
- tasks = ["feature_extraction"]
59
- tags = ["llm", "text_data", "embeddings", "visualization"]
60
-
61
- def run(self):
62
- # Assuming y_pred returns a 2D array of embeddings [samples, features]
63
- preds = self.inputs.dataset.y_pred(self.inputs.model)
64
-
65
- # Calculate statistics across the embedding dimensions, not across all embeddings
66
- means = np.mean(preds, axis=0) # Mean of each feature across all samples
67
- medians = np.median(preds, axis=0) # Median of each feature across all samples
68
- stds = np.std(preds, axis=0) # Std. dev. of each feature across all samples
69
-
70
- # Plot histograms of the calculated statistics
71
- mean_fig = px.histogram(x=means, title="Distribution of Embedding Means")
72
- median_fig = px.histogram(x=medians, title="Distribution of Embedding Medians")
73
- std_fig = px.histogram(
74
- x=stds, title="Distribution of Embedding Standard Deviations"
75
- )
76
-
77
- return self.cache_results(
78
- figures=[
79
- Figure(for_object=self, key=f"{self.key}_mean", figure=mean_fig),
80
- Figure(for_object=self, key=f"{self.key}_median", figure=median_fig),
81
- Figure(for_object=self, key=f"{self.key}_std", figure=std_fig),
82
- ],
83
- )
58
+ return (
59
+ px.histogram(
60
+ x=np.mean(dataset.y_pred(model), axis=0),
61
+ title="Distribution of Embedding Means",
62
+ ),
63
+ px.histogram(
64
+ x=np.median(dataset.y_pred(model), axis=0),
65
+ title="Distribution of Embedding Medians",
66
+ ),
67
+ px.histogram(
68
+ x=np.std(dataset.y_pred(model), axis=0),
69
+ title="Distribution of Embedding Standard Deviations",
70
+ ),
71
+ )
@@ -2,13 +2,26 @@
2
2
  # See the LICENSE file in the root of this repository for details.
3
3
  # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
4
 
5
+ from typing import Union
6
+
5
7
  import plotly.express as px
6
8
  from sklearn.manifold import TSNE
7
9
 
8
- from validmind.vm_models import Figure, Metric
10
+ from validmind import tags, tasks
11
+ from validmind.logging import get_logger
12
+ from validmind.vm_models import VMDataset, VMModel
13
+
14
+ logger = get_logger(__name__)
9
15
 
10
16
 
11
- class EmbeddingsVisualization2D(Metric):
17
+ @tags("llm", "text_data", "embeddings", "visualization")
18
+ @tasks("feature_extraction")
19
+ def EmbeddingsVisualization2D(
20
+ model: VMModel,
21
+ dataset: VMDataset,
22
+ cluster_column: Union[str, None] = None,
23
+ perplexity: int = 30,
24
+ ):
12
25
  """
13
26
  Visualizes 2D representation of text embeddings generated by a model using t-SNE technique.
14
27
 
@@ -50,52 +63,30 @@ class EmbeddingsVisualization2D(Metric):
50
63
  - It is designed for visual exploration and not for downstream tasks; that is, the 2D embeddings generated should
51
64
  not be directly used for further training or analysis.
52
65
  """
66
+ y_pred = dataset.y_pred(model)
67
+
68
+ num_samples = len(y_pred)
69
+ perplexity = perplexity if perplexity < num_samples else num_samples - 1
70
+
71
+ reduced_embeddings = TSNE(
72
+ n_components=2,
73
+ perplexity=perplexity,
74
+ ).fit_transform(y_pred)
53
75
 
54
- name = "2D Visualization of Text Embeddings"
55
- required_inputs = ["model", "dataset"]
56
- default_params = {
57
- "cluster_column": None,
58
- "perplexity": 30,
76
+ if not cluster_column and len(dataset.feature_columns_categorical) == 1:
77
+ cluster_column = dataset.feature_columns_categorical[0]
78
+ else:
79
+ logger.warning("Cannot color code embeddings without a 'cluster_column' param.")
80
+
81
+ scatter_kwargs = {
82
+ "x": reduced_embeddings[:, 0],
83
+ "y": reduced_embeddings[:, 1],
84
+ "title": "2D Visualization of Text Embeddings",
59
85
  }
60
- tasks = ["feature_extraction"]
61
- tags = ["llm", "text_data", "embeddings", "visualization"]
62
-
63
- def run(self):
64
- cluster_column = self.params.get("cluster_column")
65
-
66
- if cluster_column is None:
67
- raise ValueError(
68
- "The `cluster_column` parameter must be provided to the EmbeddingsVisualization2D test."
69
- )
70
-
71
- # use TSNE to reduce dimensionality of embeddings
72
- num_samples = len(self.inputs.dataset.y_pred(self.inputs.model))
73
-
74
- if self.params["perplexity"] >= num_samples:
75
- perplexity = num_samples - 1
76
- else:
77
- perplexity = self.params["perplexity"]
78
-
79
- reduced_embeddings = TSNE(
80
- n_components=2,
81
- perplexity=perplexity,
82
- ).fit_transform(self.inputs.model.y_test_predict.values)
83
-
84
- # create a scatter plot from the reduced embeddings
85
- fig = px.scatter(
86
- x=reduced_embeddings[:, 0],
87
- y=reduced_embeddings[:, 1],
88
- color=self.inputs.dataset.df[cluster_column],
89
- title="2D Visualization of Text Embeddings",
90
- )
91
- fig.update_layout(width=500, height=500)
92
-
93
- return self.cache_results(
94
- figures=[
95
- Figure(
96
- for_object=self,
97
- key=self.key,
98
- figure=fig,
99
- )
100
- ],
101
- )
86
+ if cluster_column:
87
+ scatter_kwargs["color"] = dataset.df[cluster_column]
88
+
89
+ fig = px.scatter(**scatter_kwargs)
90
+ fig.update_layout(width=500, height=500)
91
+
92
+ return fig