validmind 2.0.1__py3-none-any.whl → 2.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (127) hide show
  1. validmind/__init__.py +6 -3
  2. validmind/__version__.py +1 -1
  3. validmind/ai.py +193 -0
  4. validmind/api_client.py +45 -31
  5. validmind/client.py +33 -6
  6. validmind/datasets/classification/customer_churn.py +2 -2
  7. validmind/datasets/credit_risk/__init__.py +11 -0
  8. validmind/datasets/credit_risk/datasets/lending_club_loan_data_2007_2014_clean.csv.gz +0 -0
  9. validmind/datasets/credit_risk/lending_club.py +394 -0
  10. validmind/datasets/nlp/__init__.py +5 -0
  11. validmind/datasets/nlp/cnn_dailymail.py +98 -0
  12. validmind/datasets/nlp/datasets/cnn_dailymail_100_with_predictions.csv +255 -0
  13. validmind/datasets/nlp/datasets/cnn_dailymail_500_with_predictions.csv +1277 -0
  14. validmind/datasets/nlp/datasets/sentiments_with_predictions.csv +4847 -0
  15. validmind/errors.py +11 -1
  16. validmind/logging.py +9 -2
  17. validmind/models/huggingface.py +2 -2
  18. validmind/models/pytorch.py +3 -3
  19. validmind/models/sklearn.py +4 -4
  20. validmind/template.py +2 -2
  21. validmind/test_suites/__init__.py +4 -2
  22. validmind/tests/__init__.py +130 -45
  23. validmind/tests/data_validation/DatasetDescription.py +0 -1
  24. validmind/tests/data_validation/FeatureTargetCorrelationPlot.py +3 -1
  25. validmind/tests/data_validation/PiTCreditScoresHistogram.py +1 -1
  26. validmind/tests/data_validation/ScatterPlot.py +8 -2
  27. validmind/tests/data_validation/nlp/StopWords.py +1 -6
  28. validmind/tests/data_validation/nlp/TextDescription.py +20 -9
  29. validmind/tests/decorator.py +313 -0
  30. validmind/tests/model_validation/BertScore.py +1 -1
  31. validmind/tests/model_validation/BertScoreAggregate.py +1 -1
  32. validmind/tests/model_validation/BleuScore.py +1 -1
  33. validmind/tests/model_validation/ClusterSizeDistribution.py +1 -1
  34. validmind/tests/model_validation/ContextualRecall.py +1 -1
  35. validmind/tests/model_validation/FeaturesAUC.py +110 -0
  36. validmind/tests/model_validation/MeteorScore.py +92 -0
  37. validmind/tests/model_validation/RegardHistogram.py +6 -7
  38. validmind/tests/model_validation/RegardScore.py +4 -6
  39. validmind/tests/model_validation/RegressionResidualsPlot.py +127 -0
  40. validmind/tests/model_validation/RougeMetrics.py +7 -5
  41. validmind/tests/model_validation/RougeMetricsAggregate.py +1 -1
  42. validmind/tests/model_validation/SelfCheckNLIScore.py +112 -0
  43. validmind/tests/model_validation/TokenDisparity.py +1 -1
  44. validmind/tests/model_validation/ToxicityHistogram.py +1 -1
  45. validmind/tests/model_validation/ToxicityScore.py +1 -1
  46. validmind/tests/model_validation/embeddings/ClusterDistribution.py +1 -1
  47. validmind/tests/model_validation/embeddings/CosineSimilarityDistribution.py +1 -3
  48. validmind/tests/model_validation/embeddings/DescriptiveAnalytics.py +17 -22
  49. validmind/tests/model_validation/embeddings/EmbeddingsVisualization2D.py +1 -1
  50. validmind/tests/model_validation/sklearn/ClassifierPerformance.py +16 -17
  51. validmind/tests/model_validation/sklearn/ClusterCosineSimilarity.py +1 -1
  52. validmind/tests/model_validation/sklearn/ClusterPerformance.py +2 -2
  53. validmind/tests/model_validation/sklearn/ConfusionMatrix.py +21 -3
  54. validmind/tests/model_validation/sklearn/MinimumAccuracy.py +1 -1
  55. validmind/tests/model_validation/sklearn/MinimumF1Score.py +1 -1
  56. validmind/tests/model_validation/sklearn/MinimumROCAUCScore.py +1 -1
  57. validmind/tests/model_validation/sklearn/ModelsPerformanceComparison.py +5 -4
  58. validmind/tests/model_validation/sklearn/OverfitDiagnosis.py +2 -2
  59. validmind/tests/model_validation/sklearn/ROCCurve.py +6 -12
  60. validmind/tests/model_validation/sklearn/RegressionErrors.py +2 -2
  61. validmind/tests/model_validation/sklearn/RegressionModelsPerformanceComparison.py +6 -4
  62. validmind/tests/model_validation/sklearn/RegressionR2Square.py +2 -2
  63. validmind/tests/model_validation/sklearn/SHAPGlobalImportance.py +55 -5
  64. validmind/tests/model_validation/sklearn/SilhouettePlot.py +1 -1
  65. validmind/tests/model_validation/sklearn/TrainingTestDegradation.py +11 -5
  66. validmind/tests/model_validation/sklearn/WeakspotsDiagnosis.py +2 -2
  67. validmind/tests/model_validation/statsmodels/CumulativePredictionProbabilities.py +140 -0
  68. validmind/tests/model_validation/statsmodels/GINITable.py +22 -45
  69. validmind/tests/model_validation/statsmodels/{LogisticRegPredictionHistogram.py → PredictionProbabilitiesHistogram.py} +67 -92
  70. validmind/tests/model_validation/statsmodels/RegressionModelForecastPlot.py +2 -2
  71. validmind/tests/model_validation/statsmodels/RegressionModelForecastPlotLevels.py +2 -2
  72. validmind/tests/model_validation/statsmodels/RegressionModelInsampleComparison.py +1 -1
  73. validmind/tests/model_validation/statsmodels/RegressionModelOutsampleComparison.py +1 -1
  74. validmind/tests/model_validation/statsmodels/RegressionModelSummary.py +1 -1
  75. validmind/tests/model_validation/statsmodels/RegressionModelsPerformance.py +2 -2
  76. validmind/tests/model_validation/statsmodels/RegressionPermutationFeatureImportance.py +128 -0
  77. validmind/tests/model_validation/statsmodels/ScorecardHistogram.py +70 -103
  78. validmind/tests/prompt_validation/ai_powered_test.py +2 -0
  79. validmind/tests/test_providers.py +14 -124
  80. validmind/unit_metrics/__init__.py +75 -70
  81. validmind/unit_metrics/classification/sklearn/Accuracy.py +14 -0
  82. validmind/unit_metrics/classification/sklearn/F1.py +13 -0
  83. validmind/unit_metrics/classification/sklearn/Precision.py +13 -0
  84. validmind/unit_metrics/classification/sklearn/ROC_AUC.py +13 -0
  85. validmind/unit_metrics/classification/sklearn/Recall.py +13 -0
  86. validmind/unit_metrics/composite.py +228 -0
  87. validmind/unit_metrics/regression/GiniCoefficient.py +33 -0
  88. validmind/unit_metrics/regression/HuberLoss.py +23 -0
  89. validmind/unit_metrics/regression/KolmogorovSmirnovStatistic.py +30 -0
  90. validmind/unit_metrics/regression/MeanAbsolutePercentageError.py +16 -0
  91. validmind/unit_metrics/regression/MeanBiasDeviation.py +13 -0
  92. validmind/unit_metrics/regression/QuantileLoss.py +15 -0
  93. validmind/unit_metrics/regression/sklearn/AdjustedRSquaredScore.py +21 -0
  94. validmind/unit_metrics/regression/sklearn/MeanAbsoluteError.py +13 -0
  95. validmind/unit_metrics/regression/sklearn/MeanSquaredError.py +13 -0
  96. validmind/unit_metrics/regression/sklearn/RSquaredScore.py +13 -0
  97. validmind/unit_metrics/regression/sklearn/RootMeanSquaredError.py +20 -0
  98. validmind/utils.py +20 -31
  99. validmind/vm_models/__init__.py +0 -2
  100. validmind/vm_models/dataset.py +623 -29
  101. validmind/vm_models/figure.py +52 -17
  102. validmind/vm_models/test/metric.py +33 -31
  103. validmind/vm_models/test/output_template.py +0 -27
  104. validmind/vm_models/test/result_wrapper.py +68 -36
  105. validmind/vm_models/test/test.py +4 -2
  106. validmind/vm_models/test/threshold_test.py +24 -14
  107. validmind/vm_models/test_context.py +7 -0
  108. validmind/vm_models/test_suite/runner.py +1 -1
  109. validmind/vm_models/test_suite/summary.py +3 -3
  110. validmind/vm_models/test_suite/test.py +1 -1
  111. validmind/vm_models/test_suite/test_suite.py +2 -1
  112. {validmind-2.0.1.dist-info → validmind-2.1.0.dist-info}/METADATA +18 -18
  113. {validmind-2.0.1.dist-info → validmind-2.1.0.dist-info}/RECORD +116 -94
  114. validmind-2.1.0.dist-info/entry_points.txt +3 -0
  115. validmind/tests/__types__.py +0 -62
  116. validmind/tests/model_validation/statsmodels/LogRegressionConfusionMatrix.py +0 -128
  117. validmind/tests/model_validation/statsmodels/LogisticRegCumulativeProb.py +0 -172
  118. validmind/tests/model_validation/statsmodels/ScorecardBucketHistogram.py +0 -181
  119. validmind/tests/model_validation/statsmodels/ScorecardProbabilitiesHistogram.py +0 -175
  120. validmind/unit_metrics/sklearn/classification/Accuracy.py +0 -20
  121. validmind/unit_metrics/sklearn/classification/F1.py +0 -22
  122. validmind/unit_metrics/sklearn/classification/Precision.py +0 -22
  123. validmind/unit_metrics/sklearn/classification/ROC_AUC.py +0 -20
  124. validmind/unit_metrics/sklearn/classification/Recall.py +0 -20
  125. validmind/vm_models/test/unit_metric.py +0 -88
  126. {validmind-2.0.1.dist-info → validmind-2.1.0.dist-info}/LICENSE +0 -0
  127. {validmind-2.0.1.dist-info → validmind-2.1.0.dist-info}/WHEEL +0 -0
@@ -21,6 +21,18 @@ from ..errors import InvalidFigureForObjectError, UnsupportedFigureError
21
21
  from ..utils import get_full_typename
22
22
 
23
23
 
24
+ def is_matplotlib_figure(figure) -> bool:
25
+ return isinstance(figure, matplotlib.figure.Figure)
26
+
27
+
28
+ def is_plotly_figure(figure) -> bool:
29
+ return isinstance(figure, (go.Figure, go.FigureWidget))
30
+
31
+
32
+ def is_png_image(figure) -> bool:
33
+ return isinstance(figure, bytes)
34
+
35
+
24
36
  @dataclass
25
37
  class Figure:
26
38
  """
@@ -52,22 +64,10 @@ class Figure:
52
64
  if (
53
65
  not client_config.running_on_colab
54
66
  and self.figure
55
- and self.is_plotly_figure()
67
+ and is_plotly_figure(self.figure)
56
68
  ):
57
69
  self.figure = go.FigureWidget(self.figure)
58
70
 
59
- def is_matplotlib_figure(self) -> bool:
60
- """
61
- Returns True if the figure is a matplotlib figure
62
- """
63
- return isinstance(self.figure, matplotlib.figure.Figure)
64
-
65
- def is_plotly_figure(self) -> bool:
66
- """
67
- Returns True if the figure is a plotly figure
68
- """
69
- return isinstance(self.figure, (go.Figure, go.FigureWidget))
70
-
71
71
  def _get_for_object_type(self):
72
72
  """
73
73
  Returns the type of the object this figure is for
@@ -91,7 +91,7 @@ class Figure:
91
91
  we would render images as-is, but Plotly FigureWidgets don't work well
92
92
  on Google Colab when they are combined with ipywidgets.
93
93
  """
94
- if self.is_matplotlib_figure():
94
+ if is_matplotlib_figure(self.figure):
95
95
  tmpfile = BytesIO()
96
96
  self.figure.savefig(tmpfile, format="png")
97
97
  encoded = base64.b64encode(tmpfile.getvalue()).decode("utf-8")
@@ -101,7 +101,7 @@ class Figure:
101
101
  """
102
102
  )
103
103
 
104
- elif self.is_plotly_figure():
104
+ elif is_plotly_figure(self.figure):
105
105
  # FigureWidget can be displayed as-is but not on Google Colab. In this case
106
106
  # we just return the image representation of the figure.
107
107
  if client_config.running_on_colab:
@@ -114,6 +114,15 @@ class Figure:
114
114
  )
115
115
  else:
116
116
  return self.figure
117
+
118
+ elif is_png_image(self.figure):
119
+ encoded = base64.b64encode(self.figure).decode("utf-8")
120
+ return widgets.HTML(
121
+ value=f"""
122
+ <img style="width:100%; height: auto;" src="data:image/png;base64,{encoded}"/>
123
+ """
124
+ )
125
+
117
126
  else:
118
127
  raise UnsupportedFigureError(
119
128
  f"Figure type {type(self.figure)} not supported for plotting"
@@ -129,15 +138,38 @@ class Figure:
129
138
  "metadata": json.dumps(self.metadata, allow_nan=False),
130
139
  }
131
140
 
141
+ def _get_b64_url(self):
142
+ """
143
+ Returns a base64 encoded URL for the figure
144
+ """
145
+ if is_matplotlib_figure(self.figure):
146
+ buffer = BytesIO()
147
+ self.figure.savefig(buffer, format="png")
148
+ buffer.seek(0)
149
+
150
+ b64_data = base64.b64encode(buffer.read()).decode("utf-8")
151
+
152
+ return f"data:image/png;base64,{b64_data}"
153
+
154
+ elif is_plotly_figure(self.figure):
155
+ bytes = self.figure.to_image(format="png")
156
+ b64_data = base64.b64encode(bytes).decode("utf-8")
157
+
158
+ return f"data:image/png;base64,{b64_data}"
159
+
160
+ raise UnsupportedFigureError(
161
+ f"Unrecognized figure type: {get_full_typename(self.figure)}"
162
+ )
163
+
132
164
  def serialize_files(self):
133
165
  """Creates a `requests`-compatible files object to be sent to the API"""
134
- if self.is_matplotlib_figure():
166
+ if is_matplotlib_figure(self.figure):
135
167
  buffer = BytesIO()
136
168
  self.figure.savefig(buffer, bbox_inches="tight")
137
169
  buffer.seek(0)
138
170
  return {"image": (f"{self.key}.png", buffer, "image/png")}
139
171
 
140
- elif self.is_plotly_figure():
172
+ elif is_plotly_figure(self.figure):
141
173
  # When using plotly, we need to use we will produce two files:
142
174
  # - a JSON file that will be used to display the figure in the UI
143
175
  # - a PNG file that will be used to display the figure in documents
@@ -154,6 +186,9 @@ class Figure:
154
186
  ),
155
187
  }
156
188
 
189
+ elif is_png_image(self.figure):
190
+ return {"image": (f"{self.key}.png", self.figure, "image/png")}
191
+
157
192
  raise UnsupportedFigureError(
158
193
  f"Unrecognized figure type: {get_full_typename(self.figure)}"
159
194
  )
@@ -6,14 +6,15 @@
6
6
  Class for storing ValidMind metric objects and associated
7
7
  data for display and reporting purposes
8
8
  """
9
+ import os
9
10
  from abc import abstractmethod
10
11
  from dataclasses import dataclass
11
12
  from typing import ClassVar, List, Optional, Union
12
13
 
13
14
  import pandas as pd
14
15
 
16
+ from ...ai import generate_description
15
17
  from ...errors import MissingCacheResultsArgumentsError
16
- from ...utils import clean_docstring
17
18
  from ..figure import Figure
18
19
  from .metric_result import MetricResult
19
20
  from .result_wrapper import MetricResultWrapper
@@ -74,41 +75,42 @@ class Metric(Test):
74
75
  "Metric must provide a metric value or figures to cache_results"
75
76
  )
76
77
 
77
- # At a minimum, send the metric description
78
- result_metadata = [
79
- {
80
- "content_id": f"metric_description:{self.test_id}",
81
- "text": clean_docstring(self.description()),
82
- }
83
- ]
84
-
85
- result_summary = self.summary(metric_value)
86
-
87
- result_wrapper = MetricResultWrapper(
88
- result_id=self.test_id,
89
- result_metadata=result_metadata,
90
- inputs=self.get_accessed_inputs(),
91
- output_template=self.output_template,
92
- )
93
-
94
- # We can send an empty result to push an empty metric with a summary and plots
95
- metric_result_value = metric_value if metric_value is not None else {}
96
-
97
- result_wrapper.metric = MetricResult(
98
- # key=self.key,
99
- # Now using the fully qualified test ID as `key`.
100
- # Ideally the backend is updated to use `test_id` instead of `key`.
78
+ metric = MetricResult(
101
79
  key=self.test_id,
102
80
  ref_id=self._ref_id,
103
- value=metric_result_value,
81
+ value=metric_value if metric_value is not None else {},
104
82
  value_formatter=self.value_formatter,
105
- summary=result_summary,
83
+ summary=self.summary(metric_value),
106
84
  )
107
85
 
108
- # Allow metrics to attach figures to the test suite result
109
- if figures:
110
- result_wrapper.figures = figures
86
+ if (
87
+ os.environ.get("VALIDMIND_LLM_DESCRIPTIONS_ENABLED", "false").lower()
88
+ == "true"
89
+ ):
90
+ revision_name = "Generated by ValidMind AI"
91
+ description = generate_description(
92
+ test_name=self.test_id,
93
+ test_description=self.description().splitlines()[0],
94
+ test_results=metric.serialize()["value"],
95
+ test_summary=metric.serialize()["summary"],
96
+ figures=figures,
97
+ )
98
+ else:
99
+ revision_name = "Default Description"
100
+ description = self.description()
101
+
102
+ description_metadata = {
103
+ "content_id": f"metric_description:{self.test_id}::{revision_name}",
104
+ "text": description,
105
+ }
111
106
 
112
- self.result = result_wrapper
107
+ self.result = MetricResultWrapper(
108
+ result_id=self.test_id,
109
+ result_metadata=[description_metadata],
110
+ metric=metric,
111
+ figures=figures,
112
+ inputs=self.get_accessed_inputs(),
113
+ output_template=self.output_template,
114
+ )
113
115
 
114
116
  return self.result
@@ -4,7 +4,6 @@
4
4
 
5
5
  from datetime import datetime
6
6
 
7
- from bs4 import BeautifulSoup
8
7
  from dateutil import parser
9
8
  from jinja2 import Environment
10
9
 
@@ -54,29 +53,3 @@ class OutputTemplate:
54
53
  value=value,
55
54
  metric_history=values_history,
56
55
  )
57
-
58
- def parse_summary_from_html(rendered_template_html):
59
- soup = BeautifulSoup(rendered_template_html, "html.parser")
60
-
61
- # find all `<table>` elements
62
- tables = soup.find_all("table")
63
- tables_data = []
64
-
65
- for table in tables:
66
- headers = [cell.text for cell in table.find_all("th")]
67
-
68
- tables_data.append(
69
- {
70
- "type": "table",
71
- "data": [
72
- {
73
- headers[i]: cell.text
74
- for i, cell in enumerate(row.find_all("td"))
75
- }
76
- for row in table.find("tbody").find_all("tr")
77
- ],
78
- "metadata": {"title": ""}, # TODO: add title
79
- }
80
- )
81
-
82
- return tables_data
@@ -10,14 +10,15 @@ import json
10
10
  import os
11
11
  from abc import ABC, abstractmethod
12
12
  from dataclasses import dataclass
13
- from typing import List, Optional
13
+ from typing import Dict, List, Optional, Union
14
14
 
15
15
  import ipywidgets as widgets
16
- import markdown
16
+ import mistune
17
17
  import pandas as pd
18
18
  from IPython.display import display
19
19
 
20
20
  from ... import api_client
21
+ from ...ai import DescriptionFuture
21
22
  from ...utils import NumpyEncoder, run_async, test_id_to_name
22
23
  from ..figure import Figure
23
24
  from .metric_result import MetricResult
@@ -26,24 +27,33 @@ from .result_summary import ResultSummary
26
27
  from .threshold_test_result import ThresholdTestResults
27
28
 
28
29
 
29
- async def update_metadata(content_id: str, text: str) -> None:
30
+ async def update_metadata(content_id: str, text: str, _json: Union[Dict, List] = None):
30
31
  """
31
32
  Update the metadata of a content item. By default we don't
32
33
  override the existing metadata, but we can override it by
33
34
  setting the VM_OVERRIDE_METADATA environment variable to True
34
35
  """
35
- VM_OVERRIDE_METADATA = os.environ.get("VM_OVERRIDE_METADATA", False)
36
- try:
37
- existing_metadata = await api_client.get_metadata(content_id)
38
- except Exception:
39
- existing_metadata = None # TODO: handle this better
36
+ should_update = False
40
37
 
41
- if (
42
- existing_metadata is None
43
- or VM_OVERRIDE_METADATA == "True"
44
- or VM_OVERRIDE_METADATA is True
45
- ):
46
- await api_client.log_metadata(content_id, text)
38
+ # check if the env variable is set to force overwriting metadata
39
+ if os.environ.get("VM_OVERRIDE_METADATA", "false").lower() == "true":
40
+ should_update = True
41
+
42
+ # if not set, check if the content_id is a composite metric def
43
+ if not should_update and content_id.startswith("composite_metric_def:"):
44
+ # we always want composite metric definitions to be updated
45
+ should_update = True
46
+
47
+ # if not set, lets check if the metadata already exists
48
+ if not should_update:
49
+ try:
50
+ await api_client.get_metadata(content_id)
51
+ except Exception: # TODO: this shouldn't be a catch-all
52
+ # if the metadata doesn't exist, we should create (update) it
53
+ should_update = True
54
+
55
+ if should_update:
56
+ await api_client.log_metadata(content_id, text, _json)
47
57
 
48
58
 
49
59
  def plot_figures(figures: List[Figure]) -> None:
@@ -93,8 +103,7 @@ class ResultWrapper(ABC):
93
103
  """
94
104
  Convert a markdown string to html
95
105
  """
96
-
97
- return markdown.markdown(description, extensions=["markdown.extensions.tables"])
106
+ return mistune.html(description)
98
107
 
99
108
  def _summary_tables_to_widget(self, summary: ResultSummary):
100
109
  """
@@ -111,21 +120,19 @@ class ResultWrapper(ABC):
111
120
  [
112
121
  {
113
122
  "selector": "",
114
- "props": [
115
- ("width", "100%"),
116
- ],
123
+ "props": [("width", "100%")],
124
+ },
125
+ {
126
+ "selector": "th",
127
+ "props": [("text-align", "left")],
117
128
  },
118
129
  {
119
130
  "selector": "tbody tr:nth-child(even)",
120
- "props": [
121
- ("background-color", "#FFFFFF"),
122
- ],
131
+ "props": [("background-color", "#FFFFFF")],
123
132
  },
124
133
  {
125
134
  "selector": "tbody tr:nth-child(odd)",
126
- "props": [
127
- ("background-color", "#F5F5F5"),
128
- ],
135
+ "props": [("background-color", "#F5F5F5")],
129
136
  },
130
137
  {
131
138
  "selector": "td, th",
@@ -135,7 +142,8 @@ class ResultWrapper(ABC):
135
142
  ],
136
143
  },
137
144
  ]
138
- ) # add borders
145
+ )
146
+ .set_properties(**{"text-align": "left"})
139
147
  .to_html(escape=False)
140
148
  ) # table.data is an orient=records dump
141
149
 
@@ -155,7 +163,7 @@ class ResultWrapper(ABC):
155
163
 
156
164
  def log(self):
157
165
  """Log the result... May be overridden by subclasses"""
158
- return run_async(self.log_async)
166
+ run_async(self.log_async)
159
167
 
160
168
 
161
169
  @dataclass
@@ -207,15 +215,19 @@ class MetricResultWrapper(ResultWrapper):
207
215
  if self.metric and self.metric.key == "dataset_description":
208
216
  return ""
209
217
 
210
- vbox_children = []
218
+ vbox_children = [
219
+ widgets.HTML(value=f"<h1>{test_id_to_name(self.result_id)}</h1>"),
220
+ ]
211
221
 
212
222
  if self.result_metadata:
213
- metric_description = self.result_metadata[0]
223
+ metric_description = self.result_metadata[0].get("text", "")
224
+ if isinstance(metric_description, DescriptionFuture):
225
+ metric_description = metric_description.get_description()
226
+ self.result_metadata[0]["text"] = metric_description
227
+
214
228
  vbox_children.append(
215
229
  widgets.HTML(
216
- value=self._markdown_description_to_html(
217
- metric_description.get("text", "")
218
- )
230
+ value=self._markdown_description_to_html(metric_description)
219
231
  )
220
232
  )
221
233
 
@@ -297,8 +309,19 @@ class MetricResultWrapper(ResultWrapper):
297
309
  if self.figures:
298
310
  tasks.append(api_client.log_figures(self.figures))
299
311
  if hasattr(self, "result_metadata") and self.result_metadata:
312
+ description = self.result_metadata[0].get("text", "")
313
+ if isinstance(description, DescriptionFuture):
314
+ description = description.get_description()
315
+ self.result_metadata[0]["text"] = description
316
+
300
317
  for metadata in self.result_metadata:
301
- tasks.append(update_metadata(metadata["content_id"], metadata["text"]))
318
+ tasks.append(
319
+ update_metadata(
320
+ content_id=metadata["content_id"],
321
+ text=metadata.get("text", ""),
322
+ _json=metadata.get("json"),
323
+ )
324
+ )
302
325
 
303
326
  await asyncio.gather(*tasks)
304
327
 
@@ -339,14 +362,18 @@ class ThresholdTestResultWrapper(ResultWrapper):
339
362
  test_title = test_id_to_name(self.test_results.test_name)
340
363
  description_html.append(
341
364
  f"""
342
- <h2>{test_title} {"✅" if self.test_results.passed else "❌"}</h2>
365
+ <h1>{test_title} {"✅" if self.test_results.passed else "❌"}</h1>
343
366
  """
344
367
  )
345
368
 
346
369
  if self.result_metadata:
347
- metric_description = self.result_metadata[0]
370
+ metric_description = self.result_metadata[0].get("text", "")
371
+ if isinstance(metric_description, DescriptionFuture):
372
+ metric_description = metric_description.get_description()
373
+ self.result_metadata[0]["text"] = metric_description
374
+
348
375
  description_html.append(
349
- self._markdown_description_to_html(metric_description.get("text", ""))
376
+ self._markdown_description_to_html(metric_description)
350
377
  )
351
378
 
352
379
  description_html.append(
@@ -375,6 +402,11 @@ class ThresholdTestResultWrapper(ResultWrapper):
375
402
  if self.figures:
376
403
  tasks.append(api_client.log_figures(self.figures))
377
404
  if hasattr(self, "result_metadata") and self.result_metadata:
405
+ description = self.result_metadata[0].get("text", "")
406
+ if isinstance(description, DescriptionFuture):
407
+ description = description.get_description()
408
+ self.result_metadata[0]["text"] = description
409
+
378
410
  for metadata in self.result_metadata:
379
411
  tasks.append(update_metadata(metadata["content_id"], metadata["text"]))
380
412
 
@@ -6,10 +6,12 @@
6
6
 
7
7
  from abc import abstractmethod
8
8
  from dataclasses import dataclass
9
+ from inspect import getdoc
9
10
  from typing import ClassVar, List, TypedDict
10
11
  from uuid import uuid4
11
12
 
12
13
  from ..test_context import TestUtils
14
+ from .result_wrapper import ResultWrapper
13
15
 
14
16
 
15
17
  class TestMetadata(TypedDict):
@@ -35,7 +37,7 @@ class Test(TestUtils):
35
37
  _ref_id: str = None # unique identifier (populated at init)
36
38
  _section_id: str = None # which section of template this test belongs to
37
39
  test_id: str = None # populated when loading tests from suites
38
- result: object = None # type should be overridden by parent classes
40
+ result: ResultWrapper = None # type should be overridden by parent classes
39
41
 
40
42
  params: dict = None # populated by test suite from user-passed config
41
43
 
@@ -65,7 +67,7 @@ class Test(TestUtils):
65
67
  Return the test description. May be overridden by subclasses. Defaults
66
68
  to returning the class' docstring
67
69
  """
68
- return self.__doc__.strip()
70
+ return getdoc(self).strip()
69
71
 
70
72
  @abstractmethod
71
73
  def summary(self, *args, **kwargs):
@@ -8,10 +8,11 @@ Test (as test_results) but we'll refer to it as a ThresholdTest to
8
8
  avoid confusion with the "tests" in the general data science/modeling sense.
9
9
  """
10
10
 
11
+ import os
11
12
  from dataclasses import dataclass
12
13
  from typing import ClassVar, List, Optional
13
14
 
14
- from ...utils import clean_docstring
15
+ from ...ai import generate_description
15
16
  from ..figure import Figure
16
17
  from .result_summary import ResultSummary, ResultTable
17
18
  from .result_wrapper import ThresholdTestResultWrapper
@@ -76,25 +77,34 @@ class ThresholdTest(Test):
76
77
  Returns:
77
78
  TestSuiteResult: The test suite result object
78
79
  """
79
- # Rename to self.result
80
- # At a minimum, send the test description
81
- result_metadata = [
82
- {
83
- "content_id": f"test_description:{self.test_id}",
84
- "text": clean_docstring(self.description()),
85
- }
86
- ]
87
-
88
80
  result_summary = self.summary(test_results_list, passed)
89
81
 
82
+ if (
83
+ os.environ.get("VALIDMIND_LLM_DESCRIPTIONS_ENABLED", "false").lower()
84
+ == "true"
85
+ ):
86
+ revision_name = "Generated by ValidMind AI"
87
+ description = generate_description(
88
+ test_name=self.test_id,
89
+ test_description=self.description().splitlines()[0],
90
+ test_results=[result.serialize() for result in test_results_list],
91
+ test_summary=result_summary.serialize(),
92
+ figures=figures,
93
+ )
94
+ else:
95
+ revision_name = "Default Description"
96
+ description = self.description()
97
+
98
+ description_metadata = {
99
+ "content_id": f"test_description:{self.test_id}::{revision_name}",
100
+ "text": description,
101
+ }
102
+
90
103
  self.result = ThresholdTestResultWrapper(
91
104
  result_id=self.test_id,
92
- result_metadata=result_metadata,
105
+ result_metadata=[description_metadata],
93
106
  inputs=self.get_accessed_inputs(),
94
107
  test_results=ThresholdTestResults(
95
- # test_name=self.name,
96
- # Now using the fully qualified test ID as `test_name`.
97
- # Ideally the backend is updated to use `test_id` instead of `test_name`.
98
108
  test_name=self.test_id,
99
109
  ref_id=self._ref_id,
100
110
  params=self.params,
@@ -165,6 +165,13 @@ class TestUtils:
165
165
 
166
166
  return []
167
167
 
168
+ def _get_input_dict(self):
169
+ """Return a dictionary of all inputs"""
170
+ if isinstance(self.inputs, InputAccessTrackerProxy):
171
+ return self.inputs._inputs.__dict__
172
+
173
+ return self.inputs.__dict__
174
+
168
175
  def _get_legacy_input(self, key):
169
176
  """Retrieve an input from the Test Input or, for backwards compatibility,
170
177
  the Test Context
@@ -126,7 +126,7 @@ class TestSuiteRunner:
126
126
  )
127
127
 
128
128
  try:
129
- await test.log()
129
+ await test.log_async()
130
130
  except Exception as e:
131
131
  self.pbar_description.value = "Failed to send result to ValidMind"
132
132
  logger.error(f"Failed to log result: {test.result}")
@@ -6,10 +6,10 @@ from dataclasses import dataclass
6
6
  from typing import List, Optional
7
7
 
8
8
  import ipywidgets as widgets
9
+ import mistune
9
10
  from IPython.display import display
10
11
 
11
12
  from ...logging import get_logger
12
- from ...utils import clean_docstring
13
13
  from ..test.result_wrapper import FailedResultWrapper
14
14
  from .test_suite import TestSuiteSection, TestSuiteTest
15
15
 
@@ -36,7 +36,7 @@ class TestSuiteSectionSummary:
36
36
  self._build_summary()
37
37
 
38
38
  def _add_description(self):
39
- description = f'<div class="result">{clean_docstring(self.description)}</div>'
39
+ description = f'<div class="result">{mistune.html(self.description)}</div>'
40
40
  self._widgets.append(widgets.HTML(value=description))
41
41
 
42
42
  def _add_tests_summary(self):
@@ -101,7 +101,7 @@ class TestSuiteSummary:
101
101
  self._widgets.append(widgets.HTML(value=results_link))
102
102
 
103
103
  def _add_description(self):
104
- description = f'<div class="result">{clean_docstring(self.description)}</div>'
104
+ description = f'<div class="result">{mistune.html(self.description)}</div>'
105
105
  self._widgets.append(widgets.HTML(value=description))
106
106
 
107
107
  def _add_sections_summary(self):
@@ -151,7 +151,7 @@ class TestSuiteTest:
151
151
 
152
152
  self.result = self._test_instance.result
153
153
 
154
- async def log(self):
154
+ async def log_async(self):
155
155
  """Log the result for this test to ValidMind"""
156
156
  if not self.result:
157
157
  raise ValueError("Cannot log test result before running the test")
@@ -7,6 +7,7 @@ Base class for test suites and test suite sections
7
7
  """
8
8
 
9
9
  from dataclasses import dataclass
10
+ from inspect import getdoc
10
11
  from typing import ClassVar, List, Optional, Union
11
12
 
12
13
  from ...logging import get_logger
@@ -154,7 +155,7 @@ class TestSuite:
154
155
 
155
156
  @property
156
157
  def description(self):
157
- return self.__doc__
158
+ return getdoc(self).strip()
158
159
 
159
160
  @property
160
161
  def title(self):