validmind 2.2.5__py3-none-any.whl → 2.3.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (105) hide show
  1. validmind/__version__.py +1 -1
  2. validmind/{ai.py → ai/test_descriptions.py} +127 -69
  3. validmind/ai/utils.py +104 -0
  4. validmind/api_client.py +70 -31
  5. validmind/client.py +5 -5
  6. validmind/logging.py +38 -32
  7. validmind/models/foundation.py +10 -6
  8. validmind/models/function.py +3 -1
  9. validmind/models/metadata.py +1 -1
  10. validmind/test_suites/__init__.py +1 -7
  11. validmind/test_suites/regression.py +0 -16
  12. validmind/test_suites/statsmodels_timeseries.py +1 -1
  13. validmind/tests/data_validation/ACFandPACFPlot.py +36 -27
  14. validmind/tests/{model_validation/statsmodels → data_validation}/ADF.py +42 -13
  15. validmind/tests/data_validation/BivariateScatterPlots.py +38 -41
  16. validmind/tests/{model_validation/statsmodels → data_validation}/DFGLSArch.py +67 -11
  17. validmind/tests/data_validation/HeatmapFeatureCorrelations.py +1 -1
  18. validmind/tests/data_validation/HighPearsonCorrelation.py +12 -3
  19. validmind/tests/data_validation/IsolationForestOutliers.py +2 -2
  20. validmind/tests/{model_validation/statsmodels → data_validation}/KPSS.py +64 -11
  21. validmind/tests/{model_validation/statsmodels → data_validation}/PhillipsPerronArch.py +65 -11
  22. validmind/tests/data_validation/ScatterPlot.py +1 -1
  23. validmind/tests/data_validation/SeasonalDecompose.py +12 -7
  24. validmind/tests/data_validation/TabularDateTimeHistograms.py +29 -33
  25. validmind/tests/data_validation/WOEBinPlots.py +1 -1
  26. validmind/tests/data_validation/WOEBinTable.py +1 -1
  27. validmind/tests/{model_validation/statsmodels → data_validation}/ZivotAndrewsArch.py +65 -11
  28. validmind/tests/data_validation/nlp/CommonWords.py +1 -1
  29. validmind/tests/data_validation/nlp/Hashtags.py +1 -1
  30. validmind/tests/data_validation/nlp/Mentions.py +1 -1
  31. validmind/tests/data_validation/nlp/PolarityAndSubjectivity.py +2 -1
  32. validmind/tests/data_validation/nlp/Punctuations.py +1 -1
  33. validmind/tests/data_validation/nlp/Sentiment.py +1 -1
  34. validmind/tests/data_validation/nlp/TextDescription.py +5 -1
  35. validmind/tests/data_validation/nlp/Toxicity.py +1 -1
  36. validmind/tests/decorator.py +1 -1
  37. validmind/tests/model_validation/FeaturesAUC.py +5 -3
  38. validmind/tests/model_validation/embeddings/CosineSimilarityComparison.py +4 -0
  39. validmind/tests/model_validation/embeddings/CosineSimilarityHeatmap.py +4 -0
  40. validmind/tests/model_validation/embeddings/EuclideanDistanceComparison.py +4 -0
  41. validmind/tests/model_validation/embeddings/EuclideanDistanceHeatmap.py +4 -0
  42. validmind/tests/model_validation/embeddings/PCAComponentsPairwisePlots.py +4 -0
  43. validmind/tests/model_validation/embeddings/TSNEComponentsPairwisePlots.py +4 -0
  44. validmind/tests/model_validation/ragas/AnswerCorrectness.py +3 -3
  45. validmind/tests/model_validation/ragas/AnswerRelevance.py +5 -4
  46. validmind/tests/model_validation/ragas/AnswerSimilarity.py +5 -4
  47. validmind/tests/model_validation/ragas/AspectCritique.py +14 -8
  48. validmind/tests/model_validation/ragas/ContextEntityRecall.py +3 -4
  49. validmind/tests/model_validation/ragas/ContextPrecision.py +4 -5
  50. validmind/tests/model_validation/ragas/ContextRecall.py +3 -4
  51. validmind/tests/model_validation/ragas/ContextRelevancy.py +5 -4
  52. validmind/tests/model_validation/ragas/Faithfulness.py +6 -5
  53. validmind/tests/model_validation/ragas/utils.py +35 -9
  54. validmind/tests/model_validation/sklearn/ClusterPerformance.py +2 -2
  55. validmind/tests/model_validation/sklearn/ClusterPerformanceMetrics.py +1 -1
  56. validmind/tests/model_validation/sklearn/ModelsPerformanceComparison.py +6 -8
  57. validmind/tests/model_validation/sklearn/RegressionErrors.py +1 -1
  58. validmind/tests/model_validation/sklearn/RegressionModelsPerformanceComparison.py +14 -8
  59. validmind/tests/model_validation/sklearn/RegressionR2Square.py +1 -1
  60. validmind/tests/model_validation/statsmodels/DurbinWatsonTest.py +1 -1
  61. validmind/tests/model_validation/statsmodels/GINITable.py +1 -1
  62. validmind/tests/model_validation/statsmodels/JarqueBera.py +1 -1
  63. validmind/tests/model_validation/statsmodels/KolmogorovSmirnov.py +1 -1
  64. validmind/tests/model_validation/statsmodels/LJungBox.py +1 -1
  65. validmind/tests/model_validation/statsmodels/Lilliefors.py +1 -1
  66. validmind/tests/model_validation/statsmodels/RegressionCoeffsPlot.py +4 -0
  67. validmind/tests/model_validation/statsmodels/RegressionFeatureSignificance.py +9 -4
  68. validmind/tests/model_validation/statsmodels/RegressionModelsCoeffs.py +2 -2
  69. validmind/tests/model_validation/statsmodels/RunsTest.py +1 -1
  70. validmind/tests/model_validation/statsmodels/ShapiroWilk.py +1 -1
  71. validmind/tests/prompt_validation/Bias.py +14 -11
  72. validmind/tests/prompt_validation/Clarity.py +14 -11
  73. validmind/tests/prompt_validation/Conciseness.py +14 -11
  74. validmind/tests/prompt_validation/Delimitation.py +14 -11
  75. validmind/tests/prompt_validation/NegativeInstruction.py +14 -11
  76. validmind/tests/prompt_validation/Robustness.py +11 -11
  77. validmind/tests/prompt_validation/Specificity.py +14 -11
  78. validmind/tests/prompt_validation/ai_powered_test.py +53 -75
  79. validmind/unit_metrics/composite.py +2 -1
  80. validmind/utils.py +4 -49
  81. validmind/vm_models/dataset/dataset.py +17 -3
  82. validmind/vm_models/dataset/utils.py +2 -2
  83. validmind/vm_models/model.py +1 -1
  84. validmind/vm_models/test/metric.py +1 -8
  85. validmind/vm_models/test/result_wrapper.py +27 -34
  86. validmind/vm_models/test/test.py +3 -0
  87. validmind/vm_models/test/threshold_test.py +1 -1
  88. validmind/vm_models/test_suite/runner.py +12 -6
  89. validmind/vm_models/test_suite/summary.py +18 -7
  90. validmind/vm_models/test_suite/test.py +13 -20
  91. {validmind-2.2.5.dist-info → validmind-2.3.1.dist-info}/METADATA +1 -1
  92. {validmind-2.2.5.dist-info → validmind-2.3.1.dist-info}/RECORD +95 -104
  93. validmind/tests/data_validation/DefaultRatesbyRiskBandPlot.py +0 -114
  94. validmind/tests/data_validation/PiTCreditScoresHistogram.py +0 -150
  95. validmind/tests/data_validation/PiTPDHistogram.py +0 -152
  96. validmind/tests/model_validation/statsmodels/ADFTest.py +0 -88
  97. validmind/tests/model_validation/statsmodels/FeatureImportanceAndSignificance.py +0 -198
  98. validmind/tests/model_validation/statsmodels/PDRatingClassPlot.py +0 -151
  99. validmind/tests/model_validation/statsmodels/RegressionModelInsampleComparison.py +0 -146
  100. validmind/tests/model_validation/statsmodels/RegressionModelOutsampleComparison.py +0 -144
  101. validmind/tests/model_validation/statsmodels/RegressionModelsPerformance.py +0 -127
  102. validmind/tests/model_validation/statsmodels/ResidualsVisualInspection.py +0 -130
  103. {validmind-2.2.5.dist-info → validmind-2.3.1.dist-info}/LICENSE +0 -0
  104. {validmind-2.2.5.dist-info → validmind-2.3.1.dist-info}/WHEEL +0 -0
  105. {validmind-2.2.5.dist-info → validmind-2.3.1.dist-info}/entry_points.txt +0 -0
@@ -4,9 +4,13 @@
4
4
 
5
5
  from dataclasses import dataclass
6
6
 
7
+ import pandas as pd
7
8
  from statsmodels.tsa.stattools import kpss
8
9
 
9
- from validmind.vm_models import Metric
10
+ from validmind.logging import get_logger
11
+ from validmind.vm_models import Metric, ResultSummary, ResultTable, ResultTableMetadata
12
+
13
+ logger = get_logger(__name__)
10
14
 
11
15
 
12
16
  @dataclass
@@ -64,14 +68,63 @@ class KPSS(Metric):
64
68
  """
65
69
  dataset = self.inputs.dataset.df
66
70
 
67
- kpss_values = {}
71
+ # Check if the dataset is a time series
72
+ if not isinstance(dataset.index, (pd.DatetimeIndex, pd.PeriodIndex)):
73
+ raise ValueError(
74
+ "Dataset index must be a datetime or period index for time series analysis."
75
+ )
76
+
77
+ # Preprocessing: Drop rows with any NaN values
78
+ if dataset.isnull().values.any():
79
+ logger.warning(
80
+ "Dataset contains missing values. Rows with NaNs will be dropped."
81
+ )
82
+ dataset = dataset.dropna()
83
+
84
+ # Convert to numeric and handle non-numeric data
85
+ dataset = dataset.apply(pd.to_numeric, errors="coerce")
86
+
87
+ # Initialize a list to store KPSS results
88
+ kpss_values = []
89
+
68
90
  for col in dataset.columns:
69
- kpss_stat, pvalue, usedlag, critical_values = kpss(dataset[col].values)
70
- kpss_values[col] = {
71
- "stat": kpss_stat,
72
- "pvalue": pvalue,
73
- "usedlag": usedlag,
74
- "critical_values": critical_values,
75
- }
76
-
77
- return self.cache_results(kpss_values)
91
+ try:
92
+ kpss_stat, pvalue, usedlag, critical_values = kpss(dataset[col].values)
93
+ kpss_values.append(
94
+ {
95
+ "Variable": col,
96
+ "stat": kpss_stat,
97
+ "pvalue": pvalue,
98
+ "usedlag": usedlag,
99
+ "critical_values": critical_values,
100
+ }
101
+ )
102
+ except Exception as e:
103
+ logger.error(f"Error processing column '{col}': {e}")
104
+ kpss_values.append(
105
+ {
106
+ "Variable": col,
107
+ "stat": None,
108
+ "pvalue": None,
109
+ "usedlag": None,
110
+ "critical_values": None,
111
+ "error": str(e),
112
+ }
113
+ )
114
+
115
+ return self.cache_results({"kpss_results": kpss_values})
116
+
117
+ def summary(self, metric_value):
118
+ """
119
+ Build a table for summarizing the KPSS results
120
+ """
121
+ kpss_results = metric_value["kpss_results"]
122
+
123
+ return ResultSummary(
124
+ results=[
125
+ ResultTable(
126
+ data=kpss_results,
127
+ metadata=ResultTableMetadata(title="KPSS Test Results"),
128
+ )
129
+ ]
130
+ )
@@ -4,9 +4,14 @@
4
4
 
5
5
  from dataclasses import dataclass
6
6
 
7
+ import pandas as pd
7
8
  from arch.unitroot import PhillipsPerron
9
+ from numpy.linalg import LinAlgError
8
10
 
9
- from validmind.vm_models import Metric
11
+ from validmind.logging import get_logger
12
+ from validmind.vm_models import Metric, ResultSummary, ResultTable, ResultTableMetadata
13
+
14
+ logger = get_logger(__name__)
10
15
 
11
16
 
12
17
  @dataclass
@@ -62,14 +67,63 @@ class PhillipsPerronArch(Metric):
62
67
  """
63
68
  dataset = self.inputs.dataset.df
64
69
 
65
- pp_values = {}
70
+ # Check if the dataset is a time series
71
+ if not isinstance(dataset.index, (pd.DatetimeIndex, pd.PeriodIndex)):
72
+ raise ValueError(
73
+ "Dataset index must be a datetime or period index for time series analysis."
74
+ )
75
+
76
+ # Preprocessing: Drop rows with any NaN values
77
+ if dataset.isnull().values.any():
78
+ logger.warning(
79
+ "Dataset contains missing values. Rows with NaNs will be dropped."
80
+ )
81
+ dataset = dataset.dropna()
82
+
83
+ # Convert to numeric and handle non-numeric data
84
+ dataset = dataset.apply(pd.to_numeric, errors="coerce")
85
+
86
+ # Initialize a list to store Phillips-Perron results
87
+ pp_values = []
88
+
66
89
  for col in dataset.columns:
67
- pp = PhillipsPerron(dataset[col].values)
68
- pp_values[col] = {
69
- "stat": pp.stat,
70
- "pvalue": pp.pvalue,
71
- "usedlag": pp.lags,
72
- "nobs": pp.nobs,
73
- }
74
-
75
- return self.cache_results(pp_values)
90
+ try:
91
+ pp = PhillipsPerron(dataset[col].values)
92
+ pp_values.append(
93
+ {
94
+ "Variable": col,
95
+ "stat": pp.stat,
96
+ "pvalue": pp.pvalue,
97
+ "usedlag": pp.lags,
98
+ "nobs": pp.nobs,
99
+ }
100
+ )
101
+ except LinAlgError as e:
102
+ logger.error(f"Error processing column '{col}': {e}")
103
+ pp_values.append(
104
+ {
105
+ "Variable": col,
106
+ "stat": None,
107
+ "pvalue": None,
108
+ "usedlag": None,
109
+ "nobs": None,
110
+ "error": str(e),
111
+ }
112
+ )
113
+
114
+ return self.cache_results({"phillips_perron_results": pp_values})
115
+
116
+ def summary(self, metric_value):
117
+ """
118
+ Build a table for summarizing the Phillips-Perron results
119
+ """
120
+ pp_results = metric_value["phillips_perron_results"]
121
+
122
+ return ResultSummary(
123
+ results=[
124
+ ResultTable(
125
+ data=pp_results,
126
+ metadata=ResultTableMetadata(title="Phillips-Perron Test Results"),
127
+ )
128
+ ]
129
+ )
@@ -51,7 +51,7 @@ class ScatterPlot(Metric):
51
51
  """
52
52
 
53
53
  name = "scatter_plot"
54
- required_inputs = ["dataset", "dataset.target_column"]
54
+ required_inputs = ["dataset"]
55
55
  metadata = {
56
56
  "task_types": ["classification", "regression"],
57
57
  "tags": ["tabular_data", "visualization"],
@@ -90,14 +90,18 @@ class SeasonalDecompose(Metric):
90
90
  dfs = [
91
91
  pd.DataFrame(series)
92
92
  .pipe(
93
- lambda x: x.reset_index()
94
- if not isinstance(x.index, pd.DatetimeIndex)
95
- else x.reset_index().rename(columns={x.index.name: "Date"})
93
+ lambda x: (
94
+ x.reset_index()
95
+ if not isinstance(x.index, pd.DatetimeIndex)
96
+ else x.reset_index().rename(columns={x.index.name: "Date"})
97
+ )
96
98
  )
97
99
  .assign(
98
- Date=lambda x: x["Date"].astype(str)
99
- if "Date" in x.columns
100
- else x.index.astype(str)
100
+ Date=lambda x: (
101
+ x["Date"].astype(str)
102
+ if "Date" in x.columns
103
+ else x.index.astype(str)
104
+ )
101
105
  )
102
106
  for series in results.values()
103
107
  ]
@@ -200,7 +204,8 @@ class SeasonalDecompose(Metric):
200
204
  )
201
205
  else:
202
206
  warnings.warn(
203
- f"No frequency could be inferred for variable '{col}'. Skipping seasonal decomposition and plots for this variable."
207
+ f"No frequency could be inferred for variable '{col}'. "
208
+ "Skipping seasonal decomposition and plots for this variable."
204
209
  )
205
210
 
206
211
  return self.cache_results(results, figures=figures)
@@ -2,8 +2,8 @@
2
2
  # See the LICENSE file in the root of this repository for details.
3
3
  # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
4
 
5
- import matplotlib.pyplot as plt
6
- import seaborn as sns
5
+ import pandas as pd
6
+ import plotly.graph_objects as go
7
7
 
8
8
  from validmind.vm_models import Figure, Metric
9
9
 
@@ -50,45 +50,41 @@ class TabularDateTimeHistograms(Metric):
50
50
 
51
51
  metadata = {
52
52
  "task_types": ["classification", "regression"],
53
- "tags": ["tabular_data", "visualization"],
53
+ "tags": ["time_series_data", "visualization"],
54
54
  }
55
55
 
56
56
  def run(self):
57
57
  df = self.inputs.dataset.df
58
58
 
59
- # Extract datetime columns from the dataset
60
- datetime_columns = df.select_dtypes(include=["datetime64"]).columns.tolist()
61
-
62
- if len(datetime_columns) == 0:
63
- raise ValueError("No datetime columns found in the dataset")
59
+ # Check if the index is a datetime type
60
+ if not isinstance(df.index, (pd.DatetimeIndex, pd.PeriodIndex)):
61
+ raise ValueError("Index must be a datetime type")
64
62
 
65
63
  figures = []
66
- for col in datetime_columns:
67
- plt.figure()
68
- fig, _ = plt.subplots()
69
-
70
- # Calculate the difference between consecutive dates and convert to days
71
- date_diffs = df[col].sort_values().diff().dt.days.dropna()
72
-
73
- # Filter out 0 values
74
- date_diffs = date_diffs[date_diffs != 0]
75
-
76
- ax = sns.histplot(date_diffs, kde=False, bins=30)
77
- plt.title(f"{col}", weight="bold", fontsize=20)
78
-
79
- plt.xticks(fontsize=18)
80
- plt.yticks(fontsize=18)
81
- ax.set_xlabel("Days Between Consecutive Dates", fontsize=18)
82
- ax.set_ylabel("Frequency", fontsize=18)
83
- figures.append(
84
- Figure(
85
- for_object=self,
86
- key=f"{self.key}:{col}",
87
- figure=fig,
88
- )
89
- )
90
64
 
91
- plt.close("all")
65
+ # Calculate the difference between consecutive dates in the index
66
+ date_diffs = df.index.to_series().sort_values().diff().dt.days.dropna()
67
+
68
+ # Filter out 0 values
69
+ date_diffs = date_diffs[date_diffs != 0]
70
+
71
+ # Create a histogram using Plotly
72
+ fig = go.Figure()
73
+ fig.add_trace(go.Histogram(x=date_diffs, nbinsx=30))
74
+ fig.update_layout(
75
+ title="Index",
76
+ xaxis_title="Days Between Consecutive Dates",
77
+ yaxis_title="Frequency",
78
+ font=dict(size=18),
79
+ )
80
+
81
+ figures.append(
82
+ Figure(
83
+ for_object=self,
84
+ key=f"{self.key}:index",
85
+ figure=fig,
86
+ )
87
+ )
92
88
 
93
89
  return self.cache_results(
94
90
  figures=figures,
@@ -58,7 +58,7 @@ class WOEBinPlots(Metric):
58
58
  """
59
59
 
60
60
  name = "woe_bin_plots"
61
- required_context = ["dataset"]
61
+ required_inputs = ["dataset"]
62
62
  default_params = {"breaks_adj": None, "fig_height": 600, "fig_width": 500}
63
63
  metadata = {
64
64
  "task_types": ["classification"],
@@ -46,7 +46,7 @@ class WOEBinTable(Metric):
46
46
  """
47
47
 
48
48
  name = "woe_bin_table"
49
- required_context = ["dataset"]
49
+ required_inputs = ["dataset"]
50
50
  default_params = {"breaks_adj": None}
51
51
  metadata = {
52
52
  "task_types": ["classification"],
@@ -4,9 +4,14 @@
4
4
 
5
5
  from dataclasses import dataclass
6
6
 
7
+ import pandas as pd
7
8
  from arch.unitroot import ZivotAndrews
9
+ from numpy.linalg import LinAlgError
8
10
 
9
- from validmind.vm_models import Metric
11
+ from validmind.logging import get_logger
12
+ from validmind.vm_models import Metric, ResultSummary, ResultTable, ResultTableMetadata
13
+
14
+ logger = get_logger(__name__)
10
15
 
11
16
 
12
17
  @dataclass
@@ -57,14 +62,63 @@ class ZivotAndrewsArch(Metric):
57
62
  """
58
63
  dataset = self.inputs.dataset.df
59
64
 
60
- za_values = {}
65
+ # Check if the dataset is a time series
66
+ if not isinstance(dataset.index, (pd.DatetimeIndex, pd.PeriodIndex)):
67
+ raise ValueError(
68
+ "Dataset index must be a datetime or period index for time series analysis."
69
+ )
70
+
71
+ # Preprocessing: Drop rows with any NaN values
72
+ if dataset.isnull().values.any():
73
+ logger.warning(
74
+ "Dataset contains missing values. Rows with NaNs will be dropped."
75
+ )
76
+ dataset = dataset.dropna()
77
+
78
+ # Convert to numeric and handle non-numeric data
79
+ dataset = dataset.apply(pd.to_numeric, errors="coerce")
80
+
81
+ # Initialize a list to store Zivot-Andrews results
82
+ za_values = []
83
+
61
84
  for col in dataset.columns:
62
- za = ZivotAndrews(dataset[col].values)
63
- za_values[col] = {
64
- "stat": za.stat,
65
- "pvalue": za.pvalue,
66
- "usedlag": za.lags,
67
- "nobs": za.nobs,
68
- }
69
-
70
- return self.cache_results(za_values)
85
+ try:
86
+ za = ZivotAndrews(dataset[col].values)
87
+ za_values.append(
88
+ {
89
+ "Variable": col,
90
+ "stat": za.stat,
91
+ "pvalue": za.pvalue,
92
+ "usedlag": za.lags,
93
+ "nobs": za.nobs,
94
+ }
95
+ )
96
+ except (LinAlgError, ValueError) as e:
97
+ logger.error(f"Error while processing column '{col}'. Details: {e}")
98
+ za_values.append(
99
+ {
100
+ "Variable": col,
101
+ "stat": None,
102
+ "pvalue": None,
103
+ "usedlag": None,
104
+ "nobs": None,
105
+ "error": str(e),
106
+ }
107
+ )
108
+
109
+ return self.cache_results({"zivot_andrews_results": za_values})
110
+
111
+ def summary(self, metric_value):
112
+ """
113
+ Build a table for summarizing the Zivot-Andrews results
114
+ """
115
+ za_results = metric_value["zivot_andrews_results"]
116
+
117
+ return ResultSummary(
118
+ results=[
119
+ ResultTable(
120
+ data=za_results,
121
+ metadata=ResultTableMetadata(title="Zivot-Andrews Test Results"),
122
+ )
123
+ ]
124
+ )
@@ -52,7 +52,7 @@ class CommonWords(Metric):
52
52
  """
53
53
 
54
54
  name = "common_words"
55
- required_inputs = ["dataset", "dataset.text_column"]
55
+ required_inputs = ["dataset"]
56
56
  metadata = {
57
57
  "task_types": ["text_classification", "text_summarization"],
58
58
  "tags": ["nlp", "text_data", "visualization", "frequency_analysis"],
@@ -54,7 +54,7 @@ class Hashtags(ThresholdTest):
54
54
  """
55
55
 
56
56
  name = "hashtags"
57
- required_inputs = ["dataset", "dataset.text_column"]
57
+ required_inputs = ["dataset"]
58
58
  default_params = {"top_hashtags": 25}
59
59
  metadata = {
60
60
  "task_types": ["text_classification", "text_summarization"],
@@ -54,7 +54,7 @@ class Mentions(ThresholdTest):
54
54
 
55
55
  name = "mentions"
56
56
 
57
- required_inputs = ["dataset", "dataset.text_column"]
57
+ required_inputs = ["dataset"]
58
58
  default_params = {"top_mentions": 25}
59
59
  metadata = {
60
60
  "task_types": ["text_classification", "text_summarization"],
@@ -10,7 +10,7 @@ from textblob import TextBlob
10
10
  from validmind import tags, tasks
11
11
 
12
12
 
13
- @tags("data_validation")
13
+ @tags("nlp", "text_data", "data_validation")
14
14
  @tasks("nlp")
15
15
  def PolarityAndSubjectivity(dataset):
16
16
  """
@@ -27,6 +27,7 @@ def PolarityAndSubjectivity(dataset):
27
27
  Returns:
28
28
  plotly.graph_objs._figure.Figure: A Plotly scatter plot of polarity vs subjectivity.
29
29
  """
30
+
30
31
  # Function to calculate sentiment and subjectivity
31
32
  def analyze_sentiment(text):
32
33
  analysis = TextBlob(text)
@@ -51,7 +51,7 @@ class Punctuations(Metric):
51
51
  """
52
52
 
53
53
  name = "punctuations"
54
- required_inputs = ["dataset", "dataset.text_column"]
54
+ required_inputs = ["dataset"]
55
55
  metadata = {
56
56
  "task_types": ["text_classification", "text_summarization"],
57
57
  "tags": ["nlp", "text_data", "visualization", "frequency_analysis"],
@@ -11,7 +11,7 @@ from nltk.sentiment import SentimentIntensityAnalyzer
11
11
  from validmind import tags, tasks
12
12
 
13
13
 
14
- @tags("data_validation")
14
+ @tags("nlp", "text_data", "data_validation")
15
15
  @tasks("nlp")
16
16
  def Sentiment(dataset):
17
17
  """
@@ -60,7 +60,7 @@ class TextDescription(Metric):
60
60
  """
61
61
 
62
62
  name = "text_description"
63
- required_inputs = ["dataset", "dataset.text_column"]
63
+ required_inputs = ["dataset"]
64
64
  default_params = {
65
65
  "unwanted_tokens": {
66
66
  "s",
@@ -79,6 +79,10 @@ class TextDescription(Metric):
79
79
  "num_top_words": 3,
80
80
  "lang": "english",
81
81
  }
82
+ metadata = {
83
+ "task_types": ["text_classification", "text_summarization"],
84
+ "tags": ["nlp", "text_data", "visualization"],
85
+ }
82
86
 
83
87
  def general_text_metrics(self, df, text_column):
84
88
  nltk.download("punkt", quiet=True)
@@ -9,7 +9,7 @@ import seaborn as sns
9
9
  from validmind import tags, tasks
10
10
 
11
11
 
12
- @tags("data_validation")
12
+ @tags("nlp", "text_data", "data_validation")
13
13
  @tasks("nlp")
14
14
  def Toxicity(dataset):
15
15
  """
@@ -13,9 +13,9 @@ from uuid import uuid4
13
13
 
14
14
  import pandas as pd
15
15
 
16
+ from validmind.ai.test_descriptions import get_description_metadata
16
17
  from validmind.errors import MissingRequiredTestInputError
17
18
  from validmind.logging import get_logger
18
- from validmind.utils import get_description_metadata
19
19
  from validmind.vm_models import (
20
20
  Metric,
21
21
  MetricResult,
@@ -55,10 +55,12 @@ class FeaturesAUC(Metric):
55
55
  }
56
56
 
57
57
  def run(self):
58
- x = self.inputs.dataset.x_df()
59
- y = self.inputs.dataset.y_df()
58
+ dataset = self.inputs.dataset
59
+ x = dataset.x_df()
60
+ y = dataset.y_df()
61
+ n_targets = dataset.df[dataset.target_column].nunique()
60
62
 
61
- if y.nunique() != 2:
63
+ if n_targets != 2:
62
64
  raise SkipTestError("FeaturesAUC metric requires a binary target variable.")
63
65
 
64
66
  aucs = pd.DataFrame(index=x.columns, columns=["AUC"])
@@ -9,7 +9,11 @@ import pandas as pd
9
9
  import plotly.express as px
10
10
  from sklearn.metrics.pairwise import cosine_similarity
11
11
 
12
+ from validmind import tags, tasks
12
13
 
14
+
15
+ @tags("visualization", "dimensionality_reduction", "embeddings")
16
+ @tasks("text_qa", "text_generation", "text_summarization")
13
17
  def CosineSimilarityComparison(dataset, models):
14
18
  """
15
19
  Computes pairwise cosine similarities between model embeddings and visualizes the results through bar charts,
@@ -6,7 +6,11 @@ import numpy as np
6
6
  import plotly.express as px
7
7
  from sklearn.metrics.pairwise import cosine_similarity
8
8
 
9
+ from validmind import tags, tasks
9
10
 
11
+
12
+ @tags("visualization", "dimensionality_reduction", "embeddings")
13
+ @tasks("text_qa", "text_generation", "text_summarization")
10
14
  def CosineSimilarityHeatmap(
11
15
  dataset,
12
16
  model,
@@ -9,7 +9,11 @@ import pandas as pd
9
9
  import plotly.express as px
10
10
  from sklearn.metrics.pairwise import euclidean_distances
11
11
 
12
+ from validmind import tags, tasks
12
13
 
14
+
15
+ @tags("visualization", "dimensionality_reduction", "embeddings")
16
+ @tasks("text_qa", "text_generation", "text_summarization")
13
17
  def EuclideanDistanceComparison(dataset, models):
14
18
  """
15
19
  Computes pairwise Euclidean distances between model embeddings and visualizes the results through bar charts,
@@ -6,7 +6,11 @@ import numpy as np
6
6
  import plotly.express as px
7
7
  from sklearn.metrics.pairwise import euclidean_distances
8
8
 
9
+ from validmind import tags, tasks
9
10
 
11
+
12
+ @tags("visualization", "dimensionality_reduction", "embeddings")
13
+ @tasks("text_qa", "text_generation", "text_summarization")
10
14
  def EuclideanDistanceHeatmap(
11
15
  dataset,
12
16
  model,
@@ -10,7 +10,11 @@ import plotly.express as px
10
10
  from sklearn.decomposition import PCA
11
11
  from sklearn.preprocessing import StandardScaler
12
12
 
13
+ from validmind import tags, tasks
13
14
 
15
+
16
+ @tags("visualization", "dimensionality_reduction", "embeddings")
17
+ @tasks("text_qa", "text_generation", "text_summarization")
14
18
  def PCAComponentsPairwisePlots(dataset, model, n_components=3):
15
19
  """
16
20
  Generates scatter plots for pairwise combinations of principal component analysis (PCA) components of model embeddings.
@@ -10,7 +10,11 @@ import plotly.express as px
10
10
  from sklearn.manifold import TSNE
11
11
  from sklearn.preprocessing import StandardScaler
12
12
 
13
+ from validmind import tags, tasks
13
14
 
15
+
16
+ @tags("visualization", "dimensionality_reduction", "embeddings")
17
+ @tasks("text_qa", "text_generation", "text_summarization")
14
18
  def TSNEComponentsPairwisePlots(
15
19
  dataset,
16
20
  model,
@@ -11,7 +11,7 @@ from ragas.metrics import answer_correctness
11
11
 
12
12
  from validmind import tags, tasks
13
13
 
14
- from .utils import get_renamed_columns
14
+ from .utils import get_ragas_config, get_renamed_columns
15
15
 
16
16
 
17
17
  @tags("ragas", "llm")
@@ -104,7 +104,7 @@ def AnswerCorrectness(
104
104
  df = get_renamed_columns(dataset.df, required_columns)
105
105
 
106
106
  result_df = evaluate(
107
- Dataset.from_pandas(df), metrics=[answer_correctness]
107
+ Dataset.from_pandas(df), metrics=[answer_correctness], **get_ragas_config()
108
108
  ).to_pandas()
109
109
 
110
110
  fig_histogram = px.histogram(x=result_df["answer_correctness"].to_list(), nbins=10)
@@ -112,7 +112,7 @@ def AnswerCorrectness(
112
112
 
113
113
  return (
114
114
  {
115
- "Scores": result_df[
115
+ "Scores (will not be uploaded to UI)": result_df[
116
116
  ["question", "answer", "ground_truth", "answer_correctness"]
117
117
  ],
118
118
  "Aggregate Scores": [