validmind 2.3.3__py3-none-any.whl → 2.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (162) hide show
  1. validmind/__version__.py +1 -1
  2. validmind/ai/test_descriptions.py +8 -1
  3. validmind/ai/utils.py +2 -1
  4. validmind/client.py +1 -0
  5. validmind/datasets/regression/fred_timeseries.py +272 -0
  6. validmind/tests/__init__.py +14 -468
  7. validmind/tests/__types__.py +10 -0
  8. validmind/tests/_store.py +102 -0
  9. validmind/tests/data_validation/ACFandPACFPlot.py +7 -9
  10. validmind/tests/data_validation/ADF.py +8 -10
  11. validmind/tests/data_validation/ANOVAOneWayTable.py +8 -10
  12. validmind/tests/data_validation/AutoAR.py +2 -4
  13. validmind/tests/data_validation/AutoMA.py +2 -4
  14. validmind/tests/data_validation/AutoSeasonality.py +8 -10
  15. validmind/tests/data_validation/AutoStationarity.py +8 -10
  16. validmind/tests/data_validation/BivariateFeaturesBarPlots.py +8 -10
  17. validmind/tests/data_validation/BivariateHistograms.py +8 -10
  18. validmind/tests/data_validation/BivariateScatterPlots.py +8 -10
  19. validmind/tests/data_validation/ChiSquaredFeaturesTable.py +8 -10
  20. validmind/tests/data_validation/ClassImbalance.py +2 -4
  21. validmind/tests/data_validation/DFGLSArch.py +2 -4
  22. validmind/tests/data_validation/DatasetDescription.py +7 -9
  23. validmind/tests/data_validation/DatasetSplit.py +8 -9
  24. validmind/tests/data_validation/DescriptiveStatistics.py +2 -4
  25. validmind/tests/data_validation/Duplicates.py +2 -4
  26. validmind/tests/data_validation/EngleGrangerCoint.py +2 -4
  27. validmind/tests/data_validation/FeatureTargetCorrelationPlot.py +2 -4
  28. validmind/tests/data_validation/HeatmapFeatureCorrelations.py +2 -4
  29. validmind/tests/data_validation/HighCardinality.py +2 -4
  30. validmind/tests/data_validation/HighPearsonCorrelation.py +2 -4
  31. validmind/tests/data_validation/IQROutliersBarPlot.py +2 -4
  32. validmind/tests/data_validation/IQROutliersTable.py +2 -4
  33. validmind/tests/data_validation/IsolationForestOutliers.py +2 -4
  34. validmind/tests/data_validation/KPSS.py +8 -10
  35. validmind/tests/data_validation/LaggedCorrelationHeatmap.py +2 -4
  36. validmind/tests/data_validation/MissingValues.py +2 -4
  37. validmind/tests/data_validation/MissingValuesBarPlot.py +2 -4
  38. validmind/tests/data_validation/MissingValuesRisk.py +2 -4
  39. validmind/tests/data_validation/PearsonCorrelationMatrix.py +2 -4
  40. validmind/tests/data_validation/PhillipsPerronArch.py +7 -9
  41. validmind/tests/data_validation/RollingStatsPlot.py +2 -4
  42. validmind/tests/data_validation/ScatterPlot.py +2 -4
  43. validmind/tests/data_validation/SeasonalDecompose.py +70 -44
  44. validmind/tests/data_validation/Skewness.py +2 -4
  45. validmind/tests/data_validation/SpreadPlot.py +2 -4
  46. validmind/tests/data_validation/TabularCategoricalBarPlots.py +2 -4
  47. validmind/tests/data_validation/TabularDateTimeHistograms.py +2 -4
  48. validmind/tests/data_validation/TabularDescriptionTables.py +2 -4
  49. validmind/tests/data_validation/TabularNumericalHistograms.py +2 -4
  50. validmind/tests/data_validation/TargetRateBarPlots.py +2 -4
  51. validmind/tests/data_validation/TimeSeriesDescription.py +74 -0
  52. validmind/tests/data_validation/TimeSeriesDescriptiveStatistics.py +76 -0
  53. validmind/tests/data_validation/TimeSeriesFrequency.py +2 -4
  54. validmind/tests/data_validation/TimeSeriesHistogram.py +29 -45
  55. validmind/tests/data_validation/TimeSeriesLinePlot.py +2 -4
  56. validmind/tests/data_validation/TimeSeriesMissingValues.py +2 -4
  57. validmind/tests/data_validation/TimeSeriesOutliers.py +32 -45
  58. validmind/tests/data_validation/TooManyZeroValues.py +2 -4
  59. validmind/tests/data_validation/UniqueRows.py +2 -4
  60. validmind/tests/data_validation/WOEBinPlots.py +2 -4
  61. validmind/tests/data_validation/WOEBinTable.py +2 -4
  62. validmind/tests/data_validation/ZivotAndrewsArch.py +2 -4
  63. validmind/tests/data_validation/nlp/CommonWords.py +2 -4
  64. validmind/tests/data_validation/nlp/Hashtags.py +2 -4
  65. validmind/tests/data_validation/nlp/Mentions.py +2 -4
  66. validmind/tests/data_validation/nlp/Punctuations.py +2 -4
  67. validmind/tests/data_validation/nlp/StopWords.py +2 -4
  68. validmind/tests/data_validation/nlp/TextDescription.py +2 -4
  69. validmind/tests/decorator.py +10 -8
  70. validmind/tests/load.py +264 -0
  71. validmind/tests/metadata.py +59 -0
  72. validmind/tests/model_validation/ClusterSizeDistribution.py +5 -7
  73. validmind/tests/model_validation/FeaturesAUC.py +6 -8
  74. validmind/tests/model_validation/ModelMetadata.py +8 -9
  75. validmind/tests/model_validation/ModelMetadataComparison.py +59 -0
  76. validmind/tests/model_validation/ModelPredictionResiduals.py +103 -0
  77. validmind/tests/model_validation/RegressionResidualsPlot.py +2 -6
  78. validmind/tests/model_validation/TimeSeriesPredictionWithCI.py +131 -0
  79. validmind/tests/model_validation/TimeSeriesPredictionsPlot.py +76 -0
  80. validmind/tests/model_validation/TimeSeriesR2SquareBySegments.py +103 -0
  81. validmind/tests/model_validation/embeddings/ClusterDistribution.py +2 -4
  82. validmind/tests/model_validation/embeddings/CosineSimilarityDistribution.py +2 -4
  83. validmind/tests/model_validation/embeddings/DescriptiveAnalytics.py +2 -4
  84. validmind/tests/model_validation/embeddings/EmbeddingsVisualization2D.py +2 -4
  85. validmind/tests/model_validation/embeddings/StabilityAnalysis.py +2 -4
  86. validmind/tests/model_validation/sklearn/AdjustedMutualInformation.py +5 -7
  87. validmind/tests/model_validation/sklearn/AdjustedRandIndex.py +5 -7
  88. validmind/tests/model_validation/sklearn/ClassifierPerformance.py +7 -9
  89. validmind/tests/model_validation/sklearn/ClusterCosineSimilarity.py +5 -7
  90. validmind/tests/model_validation/sklearn/ClusterPerformance.py +5 -7
  91. validmind/tests/model_validation/sklearn/ClusterPerformanceMetrics.py +2 -7
  92. validmind/tests/model_validation/sklearn/CompletenessScore.py +5 -7
  93. validmind/tests/model_validation/sklearn/ConfusionMatrix.py +19 -10
  94. validmind/tests/model_validation/sklearn/FeatureImportanceComparison.py +83 -0
  95. validmind/tests/model_validation/sklearn/FowlkesMallowsScore.py +5 -7
  96. validmind/tests/model_validation/sklearn/HomogeneityScore.py +5 -7
  97. validmind/tests/model_validation/sklearn/HyperParametersTuning.py +2 -7
  98. validmind/tests/model_validation/sklearn/KMeansClustersOptimization.py +4 -7
  99. validmind/tests/model_validation/sklearn/MinimumAccuracy.py +7 -9
  100. validmind/tests/model_validation/sklearn/MinimumF1Score.py +7 -9
  101. validmind/tests/model_validation/sklearn/MinimumROCAUCScore.py +7 -9
  102. validmind/tests/model_validation/sklearn/ModelsPerformanceComparison.py +8 -10
  103. validmind/tests/model_validation/sklearn/OverfitDiagnosis.py +7 -9
  104. validmind/tests/model_validation/sklearn/PermutationFeatureImportance.py +9 -11
  105. validmind/tests/model_validation/sklearn/PopulationStabilityIndex.py +7 -9
  106. validmind/tests/model_validation/sklearn/PrecisionRecallCurve.py +8 -10
  107. validmind/tests/model_validation/sklearn/ROCCurve.py +10 -11
  108. validmind/tests/model_validation/sklearn/RegressionErrors.py +5 -7
  109. validmind/tests/model_validation/sklearn/RegressionErrorsComparison.py +76 -0
  110. validmind/tests/model_validation/sklearn/RegressionModelsPerformanceComparison.py +5 -7
  111. validmind/tests/model_validation/sklearn/RegressionR2Square.py +5 -7
  112. validmind/tests/model_validation/sklearn/RegressionR2SquareComparison.py +63 -0
  113. validmind/tests/model_validation/sklearn/RobustnessDiagnosis.py +10 -14
  114. validmind/tests/model_validation/sklearn/SHAPGlobalImportance.py +8 -10
  115. validmind/tests/model_validation/sklearn/SilhouettePlot.py +5 -7
  116. validmind/tests/model_validation/sklearn/TrainingTestDegradation.py +8 -10
  117. validmind/tests/model_validation/sklearn/VMeasure.py +5 -7
  118. validmind/tests/model_validation/sklearn/WeakspotsDiagnosis.py +8 -10
  119. validmind/tests/model_validation/statsmodels/AutoARIMA.py +2 -4
  120. validmind/tests/model_validation/statsmodels/BoxPierce.py +2 -4
  121. validmind/tests/model_validation/statsmodels/CumulativePredictionProbabilities.py +3 -4
  122. validmind/tests/model_validation/statsmodels/DurbinWatsonTest.py +2 -4
  123. validmind/tests/model_validation/statsmodels/GINITable.py +2 -4
  124. validmind/tests/model_validation/statsmodels/JarqueBera.py +7 -9
  125. validmind/tests/model_validation/statsmodels/KolmogorovSmirnov.py +7 -9
  126. validmind/tests/model_validation/statsmodels/LJungBox.py +2 -4
  127. validmind/tests/model_validation/statsmodels/Lilliefors.py +7 -9
  128. validmind/tests/model_validation/statsmodels/PredictionProbabilitiesHistogram.py +2 -4
  129. validmind/tests/model_validation/statsmodels/RegressionCoeffsPlot.py +2 -4
  130. validmind/tests/model_validation/statsmodels/RegressionFeatureSignificance.py +7 -9
  131. validmind/tests/model_validation/statsmodels/RegressionModelForecastPlot.py +2 -4
  132. validmind/tests/model_validation/statsmodels/RegressionModelForecastPlotLevels.py +2 -4
  133. validmind/tests/model_validation/statsmodels/RegressionModelSensitivityPlot.py +2 -4
  134. validmind/tests/model_validation/statsmodels/RegressionModelSummary.py +2 -4
  135. validmind/tests/model_validation/statsmodels/RegressionModelsCoeffs.py +2 -4
  136. validmind/tests/model_validation/statsmodels/RegressionPermutationFeatureImportance.py +6 -8
  137. validmind/tests/model_validation/statsmodels/RunsTest.py +2 -4
  138. validmind/tests/model_validation/statsmodels/ScorecardHistogram.py +3 -4
  139. validmind/tests/model_validation/statsmodels/ShapiroWilk.py +2 -4
  140. validmind/tests/prompt_validation/Bias.py +2 -4
  141. validmind/tests/prompt_validation/Clarity.py +2 -4
  142. validmind/tests/prompt_validation/Conciseness.py +2 -4
  143. validmind/tests/prompt_validation/Delimitation.py +2 -4
  144. validmind/tests/prompt_validation/NegativeInstruction.py +2 -4
  145. validmind/tests/prompt_validation/Robustness.py +2 -4
  146. validmind/tests/prompt_validation/Specificity.py +2 -4
  147. validmind/tests/run.py +394 -0
  148. validmind/tests/test_providers.py +12 -0
  149. validmind/tests/utils.py +16 -0
  150. validmind/unit_metrics/__init__.py +12 -4
  151. validmind/unit_metrics/composite.py +3 -0
  152. validmind/vm_models/test/metric.py +8 -5
  153. validmind/vm_models/test/result_wrapper.py +2 -1
  154. validmind/vm_models/test/test.py +14 -11
  155. validmind/vm_models/test/threshold_test.py +1 -0
  156. validmind/vm_models/test_suite/runner.py +1 -0
  157. {validmind-2.3.3.dist-info → validmind-2.4.0.dist-info}/METADATA +70 -36
  158. {validmind-2.3.3.dist-info → validmind-2.4.0.dist-info}/RECORD +162 -146
  159. /validmind/datasets/regression/datasets/{lending_club_loan_rates.csv → leanding_club_loan_rates.csv} +0 -0
  160. {validmind-2.3.3.dist-info → validmind-2.4.0.dist-info}/LICENSE +0 -0
  161. {validmind-2.3.3.dist-info → validmind-2.4.0.dist-info}/WHEEL +0 -0
  162. {validmind-2.3.3.dist-info → validmind-2.4.0.dist-info}/entry_points.txt +0 -0
@@ -4,11 +4,8 @@
4
4
 
5
5
  from dataclasses import dataclass
6
6
 
7
- import matplotlib.pyplot as plt
8
7
  import pandas as pd
9
- import seaborn as sns
10
- from ydata_profiling.config import Settings
11
- from ydata_profiling.model.typeset import ProfilingTypeSet
8
+ import plotly.graph_objects as go
12
9
 
13
10
  from validmind.vm_models import (
14
11
  Figure,
@@ -65,10 +62,8 @@ class TimeSeriesOutliers(ThresholdTest):
65
62
  name = "time_series_outliers"
66
63
  required_inputs = ["dataset"]
67
64
  default_params = {"zscore_threshold": 3}
68
- metadata = {
69
- "task_types": ["regression"],
70
- "tags": ["time_series_data"],
71
- }
65
+ tasks = ["regression"]
66
+ tags = ["time_series_data"]
72
67
 
73
68
  def summary(self, results, all_passed: bool):
74
69
  """
@@ -93,7 +88,8 @@ class TimeSeriesOutliers(ThresholdTest):
93
88
  zScores = first_result.values["z-score"]
94
89
  dates = first_result.values["Date"]
95
90
  passFail = [
96
- "Pass" if z < self.params["zscore_threshold"] else "Fail" for z in zScores
91
+ "Pass" if abs(z) < self.params["zscore_threshold"] else "Fail"
92
+ for z in zScores
97
93
  ]
98
94
 
99
95
  return ResultSummary(
@@ -116,25 +112,26 @@ class TimeSeriesOutliers(ThresholdTest):
116
112
  )
117
113
 
118
114
  def run(self):
115
+ # Initialize the test_results list
116
+ test_results = []
117
+
119
118
  # Check if the index of dataframe is datetime
120
119
  is_datetime = pd.api.types.is_datetime64_any_dtype(self.inputs.dataset.df.index)
121
120
  if not is_datetime:
122
121
  raise ValueError("Dataset must be provided with datetime index")
123
122
 
124
- # Validate threshold paremeter
123
+ # Validate threshold parameter
125
124
  if "zscore_threshold" not in self.params:
126
125
  raise ValueError("zscore_threshold must be provided in params")
127
126
  zscore_threshold = self.params["zscore_threshold"]
128
127
 
129
128
  temp_df = self.inputs.dataset.df.copy()
130
129
  # temp_df = temp_df.dropna()
131
- typeset = ProfilingTypeSet(Settings())
132
- dataset_types = typeset.infer_type(temp_df)
133
- test_results = []
134
- test_figures = []
135
- num_features_columns = [
136
- k for k, v in dataset_types.items() if str(v) == "Numeric"
137
- ]
130
+
131
+ # Infer numeric columns
132
+ num_features_columns = temp_df.select_dtypes(
133
+ include=["number"]
134
+ ).columns.tolist()
138
135
 
139
136
  outliers_table = self.identify_outliers(
140
137
  temp_df[num_features_columns], zscore_threshold
@@ -196,49 +193,39 @@ class TimeSeriesOutliers(ThresholdTest):
196
193
  df (pandas.DataFrame): Input data with time series.
197
194
  outliers_table (pandas.DataFrame): DataFrame with identified outliers.
198
195
  Returns:
199
- matplotlib.figure.Figure: A matplotlib figure object with subplots for each variable.
196
+ list: A list of Figure objects with subplots for each variable.
200
197
  """
201
- sns.set(style="darkgrid")
202
- columns = list(self.inputs.dataset.df.columns)
203
198
  figures = []
204
199
 
205
- for col in columns:
206
- plt.figure()
207
- fig, _ = plt.subplots()
208
- column_index_name = df.index.name
209
- ax = sns.lineplot(data=df.reset_index(), x=column_index_name, y=col)
200
+ for col in df.columns:
201
+ fig = go.Figure()
202
+
203
+ fig.add_trace(go.Scatter(x=df.index, y=df[col], mode="lines", name=col))
210
204
 
211
205
  if not outliers_table.empty:
212
206
  variable_outliers = outliers_table[outliers_table["Variable"] == col]
213
- for idx, row in variable_outliers.iterrows():
214
- date = row["Date"]
215
- outlier_value = df.loc[date, col]
216
- ax.scatter(
217
- date,
218
- outlier_value,
219
- marker="o",
220
- s=100,
221
- c="red",
222
- label="Outlier" if idx == 0 else "",
207
+ fig.add_trace(
208
+ go.Scatter(
209
+ x=variable_outliers["Date"],
210
+ y=df.loc[variable_outliers["Date"], col],
211
+ mode="markers",
212
+ marker=dict(color="red", size=10),
213
+ name="Outlier",
223
214
  )
215
+ )
224
216
 
225
- plt.xticks(fontsize=18)
226
- plt.yticks(fontsize=18)
227
- ax.set_xlabel("")
228
- ax.set_ylabel("")
229
- ax.set_title(
230
- f"Time Series with Outliers for {col}", weight="bold", fontsize=20
217
+ fig.update_layout(
218
+ title=f"Time Series with Outliers for {col}",
219
+ xaxis_title="Date",
220
+ yaxis_title=col,
231
221
  )
232
222
 
233
- ax.legend()
234
223
  figures.append(
235
224
  Figure(
236
225
  for_object=self,
237
- key=f"{self.name}:{col}",
226
+ key=f"{self.name}:{col}_{self.inputs.dataset.input_id}",
238
227
  figure=fig,
239
228
  )
240
229
  )
241
230
 
242
- # Do this if you want to prevent the figure from being displayed
243
- plt.close("all")
244
231
  return figures
@@ -70,10 +70,8 @@ class TooManyZeroValues(ThresholdTest):
70
70
  required_inputs = ["dataset"]
71
71
  default_params = {"max_percent_threshold": 0.03}
72
72
 
73
- metadata = {
74
- "task_types": ["regression", "classification"],
75
- "tags": ["tabular_data"],
76
- }
73
+ tasks = ["regression", "classification"]
74
+ tags = ["tabular_data"]
77
75
 
78
76
  def summary(self, results: List[ThresholdTestResult], all_passed: bool):
79
77
  """
@@ -57,10 +57,8 @@ class UniqueRows(ThresholdTest):
57
57
  required_inputs = ["dataset"]
58
58
  default_params = {"min_percent_threshold": 1}
59
59
 
60
- metadata = {
61
- "task_types": ["regression", "classification"],
62
- "tags": ["tabular_data"],
63
- }
60
+ tasks = ["regression", "classification"]
61
+ tags = ["tabular_data"]
64
62
 
65
63
  def summary(self, results: List[ThresholdTestResult], all_passed: bool):
66
64
  """
@@ -60,10 +60,8 @@ class WOEBinPlots(Metric):
60
60
  name = "woe_bin_plots"
61
61
  required_inputs = ["dataset"]
62
62
  default_params = {"breaks_adj": None, "fig_height": 600, "fig_width": 500}
63
- metadata = {
64
- "task_types": ["classification"],
65
- "tags": ["tabular_data", "visualization", "categorical_data"],
66
- }
63
+ tasks = ["classification"]
64
+ tags = ["tabular_data", "visualization", "categorical_data"]
67
65
 
68
66
  def run(self):
69
67
  df = self.inputs.dataset.df
@@ -48,10 +48,8 @@ class WOEBinTable(Metric):
48
48
  name = "woe_bin_table"
49
49
  required_inputs = ["dataset"]
50
50
  default_params = {"breaks_adj": None}
51
- metadata = {
52
- "task_types": ["classification"],
53
- "tags": ["tabular_data", "categorical_data"],
54
- }
51
+ tasks = ["classification"]
52
+ tags = ["tabular_data", "categorical_data"]
55
53
 
56
54
  def run(self):
57
55
  target_column = self.inputs.dataset.target_column
@@ -51,10 +51,8 @@ class ZivotAndrewsArch(Metric):
51
51
 
52
52
  name = "zivot_andrews"
53
53
  required_inputs = ["dataset"]
54
- metadata = {
55
- "task_types": ["regression"],
56
- "tags": ["time_series_data", "stationarity", "unit_root_test"],
57
- }
54
+ tasks = ["regression"]
55
+ tags = ["time_series_data", "stationarity", "unit_root_test"]
58
56
 
59
57
  def run(self):
60
58
  """
@@ -53,10 +53,8 @@ class CommonWords(Metric):
53
53
 
54
54
  name = "common_words"
55
55
  required_inputs = ["dataset"]
56
- metadata = {
57
- "task_types": ["text_classification", "text_summarization"],
58
- "tags": ["nlp", "text_data", "visualization", "frequency_analysis"],
59
- }
56
+ tasks = ["text_classification", "text_summarization"]
57
+ tags = ["nlp", "text_data", "visualization", "frequency_analysis"]
60
58
 
61
59
  def run(self):
62
60
  # Can only run this test if we have a Dataset object
@@ -56,10 +56,8 @@ class Hashtags(ThresholdTest):
56
56
  name = "hashtags"
57
57
  required_inputs = ["dataset"]
58
58
  default_params = {"top_hashtags": 25}
59
- metadata = {
60
- "task_types": ["text_classification", "text_summarization"],
61
- "tags": ["nlp", "text_data", "visualization", "frequency_analysis"],
62
- }
59
+ tasks = ["text_classification", "text_summarization"]
60
+ tags = ["nlp", "text_data", "visualization", "frequency_analysis"]
63
61
 
64
62
  def run(self):
65
63
  # Can only run this test if we have a Dataset object
@@ -56,10 +56,8 @@ class Mentions(ThresholdTest):
56
56
 
57
57
  required_inputs = ["dataset"]
58
58
  default_params = {"top_mentions": 25}
59
- metadata = {
60
- "task_types": ["text_classification", "text_summarization"],
61
- "tags": ["nlp", "text_data", "visualization", "frequency_analysis"],
62
- }
59
+ tasks = ["text_classification", "text_summarization"]
60
+ tags = ["nlp", "text_data", "visualization", "frequency_analysis"]
63
61
 
64
62
  def run(self):
65
63
  # Can only run this test if we have a Dataset object
@@ -52,10 +52,8 @@ class Punctuations(Metric):
52
52
 
53
53
  name = "punctuations"
54
54
  required_inputs = ["dataset"]
55
- metadata = {
56
- "task_types": ["text_classification", "text_summarization"],
57
- "tags": ["nlp", "text_data", "visualization", "frequency_analysis"],
58
- }
55
+ tasks = ["text_classification", "text_summarization"]
56
+ tags = ["nlp", "text_data", "visualization", "frequency_analysis"]
59
57
 
60
58
  def run(self):
61
59
  # Can only run this test if we have a Dataset object
@@ -71,10 +71,8 @@ class StopWords(ThresholdTest):
71
71
  name = "stop_words"
72
72
  required_inputs = ["dataset"]
73
73
  default_params = {"min_percent_threshold": 0.5, "num_words": 25}
74
- metadata = {
75
- "task_types": ["text_classification", "text_summarization"],
76
- "tags": ["nlp", "text_data", "visualization", "frequency_analysis"],
77
- }
74
+ tasks = ["text_classification", "text_summarization"]
75
+ tags = ["nlp", "text_data", "visualization", "frequency_analysis"]
78
76
 
79
77
  def summary(self, results: List[ThresholdTestResult], all_passed: bool):
80
78
  # Create a DataFrame from the data
@@ -79,10 +79,8 @@ class TextDescription(Metric):
79
79
  "num_top_words": 3,
80
80
  "lang": "english",
81
81
  }
82
- metadata = {
83
- "task_types": ["text_classification", "text_summarization"],
84
- "tags": ["nlp", "text_data", "visualization"],
85
- }
82
+ tasks = ["text_classification", "text_summarization"]
83
+ tags = ["nlp", "text_data", "visualization"]
86
84
 
87
85
  def general_text_metrics(self, df, text_column):
88
86
  nltk.download("punkt", quiet=True)
@@ -31,6 +31,8 @@ from validmind.vm_models.figure import (
31
31
  )
32
32
  from validmind.vm_models.test.result_wrapper import MetricResultWrapper
33
33
 
34
+ from ._store import test_store
35
+
34
36
  logger = get_logger(__name__)
35
37
 
36
38
 
@@ -56,7 +58,9 @@ def _inspect_signature(test_func: callable):
56
58
  return inputs, params
57
59
 
58
60
 
59
- def _build_result(results, test_id, description, output_template, inputs): # noqa: C901
61
+ def _build_result( # noqa: C901
62
+ results, test_id, description, output_template, inputs, generate_description=True
63
+ ):
60
64
  ref_id = str(uuid4())
61
65
  figure_metadata = {
62
66
  "_type": "metric",
@@ -131,6 +135,7 @@ def _build_result(results, test_id, description, output_template, inputs): # no
131
135
  default_description=description,
132
136
  summary=result_summary.serialize(),
133
137
  figures=figures,
138
+ should_generate=generate_description,
134
139
  )
135
140
  ],
136
141
  inputs=inputs,
@@ -159,6 +164,7 @@ def _get_run_method(func, inputs, params):
159
164
  description=inspect.getdoc(self),
160
165
  output_template=self.output_template,
161
166
  inputs=self.get_accessed_inputs(),
167
+ generate_description=self.generate_description,
162
168
  )
163
169
 
164
170
  return self.result
@@ -265,8 +271,6 @@ def test(func_or_id):
265
271
  The decorated function.
266
272
  """
267
273
 
268
- from . import _register_custom_test
269
-
270
274
  def decorator(func):
271
275
  test_id = func_or_id or f"validmind.custom_metrics.{func.__name__}"
272
276
 
@@ -283,13 +287,11 @@ def test(func_or_id):
283
287
  "required_inputs": list(inputs.keys()),
284
288
  "default_params": {k: v["default"] for k, v in params.items()},
285
289
  "__doc__": description,
286
- "metadata": {
287
- "task_types": tasks,
288
- "tags": tags,
289
- },
290
+ "tasks": tasks,
291
+ "tags": tags,
290
292
  },
291
293
  )
292
- _register_custom_test(test_id, metric_class)
294
+ test_store.register_custom_test(test_id, metric_class)
293
295
 
294
296
  # special function to allow the function to be saved to a file
295
297
  func.save = _get_save_func(func, test_id)
@@ -0,0 +1,264 @@
1
+ # Copyright © 2023-2024 ValidMind Inc. All rights reserved.
2
+ # See the LICENSE file in the root of this repository for details.
3
+ # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
+
5
+ """Module for listing and loading tests."""
6
+
7
+ import importlib
8
+ import inspect
9
+ import json
10
+ import sys
11
+ from pathlib import Path
12
+ from pprint import pformat
13
+ from uuid import uuid4
14
+
15
+ import pandas as pd
16
+ from ipywidgets import HTML, Accordion
17
+
18
+ from ..errors import LoadTestError
19
+ from ..html_templates.content_blocks import test_content_block_html
20
+ from ..logging import get_logger
21
+ from ..unit_metrics.composite import load_composite_metric
22
+ from ..utils import (
23
+ NumpyEncoder,
24
+ display,
25
+ format_dataframe,
26
+ fuzzy_match,
27
+ md_to_html,
28
+ test_id_to_name,
29
+ )
30
+ from .__types__ import TestID
31
+ from ._store import test_provider_store, test_store
32
+ from .decorator import test as test_decorator
33
+ from .utils import test_description
34
+
35
+ logger = get_logger(__name__)
36
+
37
+
38
+ def __init__():
39
+ directories = [p.name for p in Path(__file__).parent.iterdir() if p.is_dir()]
40
+
41
+ for d in directories:
42
+ for path in Path(__file__).parent.joinpath(d).glob("**/**/*.py"):
43
+ if path.name.startswith("__") or not path.name[0].isupper():
44
+ continue # skip __init__.py and other special files as well as non Test files
45
+ test_id = (
46
+ f"validmind.{d}.{path.parent.stem}.{path.stem}"
47
+ if path.parent.parent.stem == d
48
+ else f"validmind.{d}.{path.stem}"
49
+ )
50
+ test_store.register_test(test_id)
51
+
52
+
53
+ __init__()
54
+
55
+
56
+ def _pretty_list_tests(tests, truncate=True):
57
+ table = [
58
+ {
59
+ "ID": test_id,
60
+ "Name": test_id_to_name(test_id),
61
+ "Description": test_description(test, truncate),
62
+ "Required Inputs": test.required_inputs,
63
+ "Params": test.default_params or {},
64
+ }
65
+ for test_id, test in tests.items()
66
+ ]
67
+
68
+ return format_dataframe(pd.DataFrame(table))
69
+
70
+
71
+ def list_tests(
72
+ filter=None, task=None, tags=None, pretty=True, truncate=True, __as_class=False
73
+ ):
74
+ """List all tests in the tests directory.
75
+
76
+ Args:
77
+ filter (str, optional): Find tests where the ID, tasks or tags match the
78
+ filter string. Defaults to None.
79
+ task (str, optional): Find tests that match the task. Can be used to
80
+ narrow down matches from the filter string. Defaults to None.
81
+ tags (list, optional): Find tests that match list of tags. Can be used to
82
+ narrow down matches from the filter string. Defaults to None.
83
+ pretty (bool, optional): If True, returns a pandas DataFrame with a
84
+ formatted table. Defaults to True.
85
+ truncate (bool, optional): If True, truncates the test description to the first
86
+ line. Defaults to True. (only used if pretty=True)
87
+
88
+ Returns:
89
+ list or pandas.DataFrame: A list of all tests or a formatted table.
90
+ """
91
+ tests = {
92
+ test_id: load_test(test_id, reload=True)
93
+ for test_id in test_store.get_test_ids()
94
+ }
95
+
96
+ # first search by the filter string since it's the most general search
97
+ if filter is not None:
98
+ tests = {
99
+ test_id: test
100
+ for test_id, test in tests.items()
101
+ if filter.lower() in test_id.lower()
102
+ or any(filter.lower() in task.lower() for task in test.tasks)
103
+ or any(fuzzy_match(tag, filter.lower()) for tag in test.tags)
104
+ }
105
+
106
+ # then filter by task type and tags since they are more specific
107
+ if task is not None:
108
+ tests = {test_id: test for test_id, test in tests.items() if task in test.tasks}
109
+
110
+ if tags is not None:
111
+ tests = {
112
+ test_id: test
113
+ for test_id, test in tests.items()
114
+ if all(tag in test.tags for tag in tags)
115
+ }
116
+
117
+ if __as_class:
118
+ return list(tests.values())
119
+
120
+ if not pretty:
121
+ # only return test ids
122
+ return list(tests.keys())
123
+
124
+ return _pretty_list_tests(tests, truncate=truncate)
125
+
126
+
127
+ def _load_validmind_test(test_id, reload=False):
128
+ parts = test_id.split(":")[0].split(".")
129
+
130
+ test_module = ".".join(parts[1:-1])
131
+ test_class = parts[-1]
132
+
133
+ error = None
134
+ test = None
135
+
136
+ try:
137
+ full_path = f"validmind.tests.{test_module}.{test_class}"
138
+
139
+ if reload and full_path in sys.modules:
140
+ module = importlib.reload(sys.modules[full_path])
141
+ else:
142
+ module = importlib.import_module(full_path)
143
+
144
+ test = getattr(module, test_class)
145
+ except ModuleNotFoundError as e:
146
+ error = f"Unable to load test {test_id}. {e}"
147
+ except AttributeError:
148
+ error = f"Unable to load test {test_id}. Test not in module: {test_class}"
149
+
150
+ return error, test
151
+
152
+
153
+ def load_test(test_id: str, reload=False):
154
+ """Load a test by test ID
155
+
156
+ Test IDs are in the format `namespace.path_to_module.TestClassOrFuncName[:result_id]`.
157
+ The result ID is optional and is used to distinguish between multiple results from the
158
+ running the same test.
159
+
160
+ Args:
161
+ test_id (str): The test ID in the format `namespace.path_to_module.TestName[:result_id]`
162
+ reload (bool, optional): Whether to reload the test module. Defaults to False.
163
+ """
164
+ # TODO: we should use a dedicated class for test IDs to handle this consistently
165
+ test_id, result_id = test_id.split(":", 1) if ":" in test_id else (test_id, None)
166
+
167
+ error = None
168
+ namespace = test_id.split(".", 1)[0]
169
+
170
+ # TODO: lets implement an extensible loading system instead of this ugly if/else
171
+ if test_store.get_custom_test(test_id):
172
+ test = test_store.get_custom_test(test_id)
173
+
174
+ elif test_id.startswith("validmind.composite_metric"):
175
+ error, test = load_composite_metric(test_id)
176
+
177
+ elif namespace == "validmind":
178
+ error, test = _load_validmind_test(test_id, reload=reload)
179
+
180
+ elif test_provider_store.has_test_provider(namespace):
181
+ provider = test_provider_store.get_test_provider(namespace)
182
+
183
+ try:
184
+ test = provider.load_test(test_id.split(".", 1)[1])
185
+ except Exception as e:
186
+ error = (
187
+ f"Unable to load test {test_id} from test provider: "
188
+ f"{provider}\n Got Exception: {e}"
189
+ )
190
+
191
+ else:
192
+ error = f"Unable to load test {test_id}. No test provider found."
193
+
194
+ if error:
195
+ logger.error(error)
196
+ raise LoadTestError(error)
197
+
198
+ if inspect.isfunction(test):
199
+ # if its a function, we decorate it and then load the class
200
+ # TODO: simplify this as we move towards all functional metrics
201
+ # "_" is used here so it doesn't conflict with other test ids
202
+ test_decorator("_")(test)
203
+ test = test_store.get_custom_test("_")
204
+
205
+ test.test_id = f"{test_id}:{result_id}" if result_id else test_id
206
+
207
+ return test
208
+
209
+
210
+ def describe_test(test_id: TestID = None, raw: bool = False, show: bool = True):
211
+ """Get or show details about the test
212
+
213
+ This function can be used to see test details including the test name, description,
214
+ required inputs and default params. It can also be used to get a dictionary of the
215
+ above information for programmatic use.
216
+
217
+ Args:
218
+ test_id (str, optional): The test ID. Defaults to None.
219
+ raw (bool, optional): If True, returns a dictionary with the test details.
220
+ Defaults to False.
221
+ """
222
+ test = load_test(test_id, reload=True)
223
+
224
+ details = {
225
+ "ID": test_id,
226
+ "Name": test_id_to_name(test_id),
227
+ "Required Inputs": test.required_inputs or [],
228
+ "Params": test.default_params or {},
229
+ "Description": inspect.getdoc(test).strip() or "",
230
+ }
231
+
232
+ if raw:
233
+ return details
234
+
235
+ html = test_content_block_html.format(
236
+ test_id=test_id,
237
+ uuid=str(uuid4()),
238
+ title=f'{details["Name"]}',
239
+ description=md_to_html(details["Description"].strip()),
240
+ required_inputs=", ".join(details["Required Inputs"] or ["None"]),
241
+ params_table="\n".join(
242
+ [
243
+ f"<tr><td>{param}</td><td>{pformat(value, indent=4)}</td></tr>"
244
+ for param, value in details["Params"].items()
245
+ ]
246
+ ),
247
+ table_display="table" if details["Params"] else "none",
248
+ example_inputs=json.dumps(
249
+ {name: f"my_vm_{name}" for name in (details["Required Inputs"] or [])},
250
+ indent=4,
251
+ ),
252
+ example_params=json.dumps(details["Params"] or {}, indent=4, cls=NumpyEncoder),
253
+ instructions_display="block" if show else "none",
254
+ )
255
+
256
+ if not show:
257
+ return html
258
+
259
+ display(
260
+ Accordion(
261
+ children=[HTML(html)],
262
+ titles=[f"Test Description: {details['Name']} ('{test_id}')"],
263
+ )
264
+ )
@@ -0,0 +1,59 @@
1
+ # Copyright © 2023-2024 ValidMind Inc. All rights reserved.
2
+ # See the LICENSE file in the root of this repository for details.
3
+ # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
+
5
+ import pandas as pd
6
+
7
+ from validmind.utils import format_dataframe
8
+
9
+ from .load import list_tests
10
+
11
+
12
+ def list_tags():
13
+ """
14
+ List unique tags from all test classes.
15
+ """
16
+
17
+ unique_tags = set()
18
+
19
+ for test in list_tests(__as_class=True):
20
+ unique_tags.update(test.tags)
21
+
22
+ return list(unique_tags)
23
+
24
+
25
+ def list_tasks_and_tags():
26
+ """
27
+ List all task types and their associated tags, with one row per task type and
28
+ all tags for a task type in one row.
29
+
30
+ Returns:
31
+ pandas.DataFrame: A DataFrame with 'Task Type' and concatenated 'Tags'.
32
+ """
33
+ task_tags_dict = {}
34
+
35
+ for test in list_tests(__as_class=True):
36
+ for task in test.tasks:
37
+ task_tags_dict.setdefault(task, set()).update(test.tags)
38
+
39
+ return format_dataframe(
40
+ pd.DataFrame(
41
+ [
42
+ {"Task": task, "Tags": ", ".join(tags)}
43
+ for task, tags in task_tags_dict.items()
44
+ ]
45
+ )
46
+ )
47
+
48
+
49
+ def list_tasks():
50
+ """
51
+ List unique tasks from all test classes.
52
+ """
53
+
54
+ unique_tasks = set()
55
+
56
+ for test in list_tests(__as_class=True):
57
+ unique_tasks.update(test.tasks)
58
+
59
+ return list(unique_tasks)