validmind 2.1.1__py3-none-any.whl → 2.2.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (113) hide show
  1. validmind/__version__.py +1 -1
  2. validmind/ai.py +72 -49
  3. validmind/api_client.py +42 -16
  4. validmind/client.py +68 -25
  5. validmind/datasets/llm/rag/__init__.py +11 -0
  6. validmind/datasets/llm/rag/datasets/rfp_existing_questions_client_1.csv +30 -0
  7. validmind/datasets/llm/rag/datasets/rfp_existing_questions_client_2.csv +30 -0
  8. validmind/datasets/llm/rag/datasets/rfp_existing_questions_client_3.csv +53 -0
  9. validmind/datasets/llm/rag/datasets/rfp_existing_questions_client_4.csv +53 -0
  10. validmind/datasets/llm/rag/datasets/rfp_existing_questions_client_5.csv +53 -0
  11. validmind/datasets/llm/rag/rfp.py +41 -0
  12. validmind/errors.py +1 -1
  13. validmind/html_templates/__init__.py +0 -0
  14. validmind/html_templates/content_blocks.py +89 -14
  15. validmind/models/__init__.py +7 -4
  16. validmind/models/foundation.py +8 -34
  17. validmind/models/function.py +51 -0
  18. validmind/models/huggingface.py +16 -46
  19. validmind/models/metadata.py +42 -0
  20. validmind/models/pipeline.py +66 -0
  21. validmind/models/pytorch.py +8 -42
  22. validmind/models/r_model.py +33 -82
  23. validmind/models/sklearn.py +39 -38
  24. validmind/template.py +8 -26
  25. validmind/tests/__init__.py +43 -20
  26. validmind/tests/data_validation/ANOVAOneWayTable.py +1 -1
  27. validmind/tests/data_validation/ChiSquaredFeaturesTable.py +1 -1
  28. validmind/tests/data_validation/DescriptiveStatistics.py +2 -4
  29. validmind/tests/data_validation/Duplicates.py +1 -1
  30. validmind/tests/data_validation/IsolationForestOutliers.py +2 -2
  31. validmind/tests/data_validation/LaggedCorrelationHeatmap.py +1 -1
  32. validmind/tests/data_validation/TargetRateBarPlots.py +1 -1
  33. validmind/tests/data_validation/nlp/LanguageDetection.py +59 -0
  34. validmind/tests/data_validation/nlp/PolarityAndSubjectivity.py +48 -0
  35. validmind/tests/data_validation/nlp/Punctuations.py +11 -12
  36. validmind/tests/data_validation/nlp/Sentiment.py +57 -0
  37. validmind/tests/data_validation/nlp/Toxicity.py +45 -0
  38. validmind/tests/decorator.py +12 -7
  39. validmind/tests/model_validation/BertScore.py +100 -98
  40. validmind/tests/model_validation/BleuScore.py +93 -64
  41. validmind/tests/model_validation/ContextualRecall.py +74 -91
  42. validmind/tests/model_validation/MeteorScore.py +86 -74
  43. validmind/tests/model_validation/RegardScore.py +103 -121
  44. validmind/tests/model_validation/RougeScore.py +118 -0
  45. validmind/tests/model_validation/TokenDisparity.py +84 -121
  46. validmind/tests/model_validation/ToxicityScore.py +109 -123
  47. validmind/tests/model_validation/embeddings/CosineSimilarityComparison.py +96 -0
  48. validmind/tests/model_validation/embeddings/CosineSimilarityHeatmap.py +71 -0
  49. validmind/tests/model_validation/embeddings/EuclideanDistanceComparison.py +92 -0
  50. validmind/tests/model_validation/embeddings/EuclideanDistanceHeatmap.py +69 -0
  51. validmind/tests/model_validation/embeddings/PCAComponentsPairwisePlots.py +78 -0
  52. validmind/tests/model_validation/embeddings/StabilityAnalysis.py +35 -23
  53. validmind/tests/model_validation/embeddings/StabilityAnalysisKeyword.py +3 -0
  54. validmind/tests/model_validation/embeddings/StabilityAnalysisRandomNoise.py +7 -1
  55. validmind/tests/model_validation/embeddings/StabilityAnalysisSynonyms.py +3 -0
  56. validmind/tests/model_validation/embeddings/StabilityAnalysisTranslation.py +3 -0
  57. validmind/tests/model_validation/embeddings/TSNEComponentsPairwisePlots.py +99 -0
  58. validmind/tests/model_validation/ragas/AnswerCorrectness.py +131 -0
  59. validmind/tests/model_validation/ragas/AnswerRelevance.py +134 -0
  60. validmind/tests/model_validation/ragas/AnswerSimilarity.py +119 -0
  61. validmind/tests/model_validation/ragas/AspectCritique.py +167 -0
  62. validmind/tests/model_validation/ragas/ContextEntityRecall.py +133 -0
  63. validmind/tests/model_validation/ragas/ContextPrecision.py +123 -0
  64. validmind/tests/model_validation/ragas/ContextRecall.py +123 -0
  65. validmind/tests/model_validation/ragas/ContextRelevancy.py +114 -0
  66. validmind/tests/model_validation/ragas/Faithfulness.py +119 -0
  67. validmind/tests/model_validation/ragas/utils.py +66 -0
  68. validmind/tests/model_validation/sklearn/OverfitDiagnosis.py +3 -7
  69. validmind/tests/model_validation/sklearn/PermutationFeatureImportance.py +8 -9
  70. validmind/tests/model_validation/sklearn/PopulationStabilityIndex.py +5 -10
  71. validmind/tests/model_validation/sklearn/PrecisionRecallCurve.py +3 -2
  72. validmind/tests/model_validation/sklearn/ROCCurve.py +2 -1
  73. validmind/tests/model_validation/sklearn/RegressionR2Square.py +1 -1
  74. validmind/tests/model_validation/sklearn/RobustnessDiagnosis.py +2 -3
  75. validmind/tests/model_validation/sklearn/SHAPGlobalImportance.py +7 -11
  76. validmind/tests/model_validation/sklearn/WeakspotsDiagnosis.py +3 -4
  77. validmind/tests/model_validation/statsmodels/RegressionModelForecastPlot.py +1 -1
  78. validmind/tests/model_validation/statsmodels/RegressionModelForecastPlotLevels.py +1 -1
  79. validmind/tests/model_validation/statsmodels/RegressionModelInsampleComparison.py +1 -1
  80. validmind/tests/model_validation/statsmodels/RegressionModelOutsampleComparison.py +1 -1
  81. validmind/tests/model_validation/statsmodels/RegressionModelSummary.py +1 -1
  82. validmind/tests/model_validation/statsmodels/RegressionModelsCoeffs.py +1 -1
  83. validmind/tests/model_validation/statsmodels/RegressionModelsPerformance.py +1 -1
  84. validmind/tests/model_validation/statsmodels/ScorecardHistogram.py +5 -6
  85. validmind/unit_metrics/__init__.py +26 -49
  86. validmind/unit_metrics/composite.py +13 -7
  87. validmind/unit_metrics/regression/sklearn/AdjustedRSquaredScore.py +1 -1
  88. validmind/utils.py +99 -6
  89. validmind/vm_models/__init__.py +1 -1
  90. validmind/vm_models/dataset/__init__.py +7 -0
  91. validmind/vm_models/dataset/dataset.py +560 -0
  92. validmind/vm_models/dataset/utils.py +146 -0
  93. validmind/vm_models/model.py +97 -72
  94. validmind/vm_models/test/metric.py +9 -24
  95. validmind/vm_models/test/result_wrapper.py +124 -28
  96. validmind/vm_models/test/threshold_test.py +10 -28
  97. validmind/vm_models/test_context.py +1 -1
  98. validmind/vm_models/test_suite/summary.py +3 -4
  99. {validmind-2.1.1.dist-info → validmind-2.2.4.dist-info}/METADATA +5 -3
  100. {validmind-2.1.1.dist-info → validmind-2.2.4.dist-info}/RECORD +103 -78
  101. validmind/models/catboost.py +0 -33
  102. validmind/models/statsmodels.py +0 -50
  103. validmind/models/xgboost.py +0 -30
  104. validmind/tests/model_validation/BertScoreAggregate.py +0 -90
  105. validmind/tests/model_validation/RegardHistogram.py +0 -148
  106. validmind/tests/model_validation/RougeMetrics.py +0 -147
  107. validmind/tests/model_validation/RougeMetricsAggregate.py +0 -133
  108. validmind/tests/model_validation/SelfCheckNLIScore.py +0 -112
  109. validmind/tests/model_validation/ToxicityHistogram.py +0 -136
  110. validmind/vm_models/dataset.py +0 -1303
  111. {validmind-2.1.1.dist-info → validmind-2.2.4.dist-info}/LICENSE +0 -0
  112. {validmind-2.1.1.dist-info → validmind-2.2.4.dist-info}/WHEEL +0 -0
  113. {validmind-2.1.1.dist-info → validmind-2.2.4.dist-info}/entry_points.txt +0 -0
@@ -7,9 +7,11 @@ Model class wrapper module
7
7
  """
8
8
  import importlib
9
9
  import inspect
10
- from abc import abstractmethod
10
+ from abc import ABC, abstractmethod
11
11
  from dataclasses import dataclass
12
12
 
13
+ from validmind.errors import MissingOrInvalidModelPredictFnError
14
+
13
15
  SUPPORTED_LIBRARIES = {
14
16
  "catboost": "CatBoostModel",
15
17
  "xgboost": "XGBoostModel",
@@ -17,6 +19,8 @@ SUPPORTED_LIBRARIES = {
17
19
  "statsmodels": "StatsModelsModel",
18
20
  "torch": "PyTorchModel",
19
21
  "transformers": "HFModel",
22
+ "function": "FunctionModel",
23
+ "pipeline": "PipelineModel",
20
24
  "custom": "SKlearnModel",
21
25
  }
22
26
 
@@ -32,6 +36,23 @@ R_MODEL_METHODS = [
32
36
  ]
33
37
 
34
38
 
39
+ class ModelPipeline:
40
+ """Helper class for chaining models together
41
+
42
+ This shouldn't be used directly, it just gets used when chaining models with the
43
+ `|` operator since you can't use a list directly - you must use a type that
44
+ overloads the `|` operator.
45
+ """
46
+
47
+ def __init__(self, models):
48
+ self.models = models
49
+
50
+ def __or__(self, other):
51
+ self.models.append(other)
52
+
53
+ return self
54
+
55
+
35
56
  @dataclass
36
57
  class ModelAttributes:
37
58
  """
@@ -41,51 +62,67 @@ class ModelAttributes:
41
62
  architecture: str = None
42
63
  framework: str = None
43
64
  framework_version: str = None
65
+ language: str = None
44
66
 
67
+ @classmethod
68
+ def from_dict(cls, data):
69
+ """
70
+ Creates a ModelAttributes instance from a dictionary
71
+ """
72
+ return cls(
73
+ architecture=data.get("architecture"),
74
+ framework=data.get("framework"),
75
+ framework_version=data.get("framework_version"),
76
+ language=data.get("language"),
77
+ )
45
78
 
46
- class VMModel:
79
+
80
+ class VMModel(ABC):
47
81
  """
48
82
  An base class that wraps a trained model instance and its associated data.
49
83
 
50
84
  Attributes:
51
- attributes (ModelAttributes, optional): The attributes of the model. Defaults to None.
52
85
  model (object, optional): The trained model instance. Defaults to None.
53
- device_type(str, optional) The device where model is trained
86
+ input_id (str, optional): The input ID for the model. Defaults to None.
87
+ attributes (ModelAttributes, optional): The attributes of the model. Defaults to None.
88
+ name (str, optional): The name of the model. Defaults to the class name.
54
89
  """
55
90
 
56
- input_id: str = None
57
-
58
91
  def __init__(
59
92
  self,
60
93
  input_id: str = None,
61
94
  model: object = None,
62
95
  attributes: ModelAttributes = None,
96
+ name: str = None,
97
+ **kwargs,
63
98
  ):
64
- self._model = model
65
- self._input_id = input_id
66
- self._attributes = attributes
99
+ self.model = model
100
+ self.input_id = input_id
67
101
 
68
- # The device where model is trained
69
- self._device_type = None
102
+ self.language = "Python"
103
+ self.library = self.__class__.__name__
104
+ self.library_version = "N/A"
105
+ self.class_ = self.__class__.__name__
70
106
 
71
- @property
72
- def attributes(self):
73
- return self._attributes
107
+ self.name = name or self.__class__.__name__
74
108
 
75
- @property
76
- def input_id(self):
77
- return self._input_id
109
+ self.attributes = attributes
78
110
 
79
- @property
80
- def model(self):
81
- return self._model
111
+ # set any additional attributes passed in (likely for subclasses)
112
+ for key, value in kwargs.items():
113
+ setattr(self, key, value)
82
114
 
83
- @property
84
- def device_type(self):
85
- """
86
- The device where model is trained
87
- """
88
- return self._device_type
115
+ self.__post_init__()
116
+
117
+ def __post_init__(self):
118
+ """Allows child classes to add their own post-init logic"""
119
+ pass
120
+
121
+ def __or__(self, other):
122
+ if not isinstance(other, VMModel):
123
+ raise ValueError("Can only chain VMModel objects")
124
+
125
+ return ModelPipeline([self, other])
89
126
 
90
127
  def serialize(self):
91
128
  """
@@ -95,13 +132,11 @@ class VMModel:
95
132
  "attributes": self.attributes.__dict__,
96
133
  }
97
134
 
98
- @abstractmethod
99
135
  def predict_proba(self, *args, **kwargs):
100
- """
101
- Predict probability for the model.
102
- This is a wrapper around the model's if available
103
- """
104
- pass
136
+ """Predict probabilties - must be implemented by subclass if needed"""
137
+ raise MissingOrInvalidModelPredictFnError(
138
+ "`predict_proba()` method not implemented for this model"
139
+ )
105
140
 
106
141
  @abstractmethod
107
142
  def predict(self, *args, **kwargs):
@@ -110,42 +145,6 @@ class VMModel:
110
145
  """
111
146
  pass
112
147
 
113
- @abstractmethod
114
- def model_language(self, *args, **kwargs):
115
- """
116
- Programming language used to train the model. Assume Python if this
117
- method is not implemented
118
- """
119
- pass
120
-
121
- @abstractmethod
122
- def model_library(self, *args, **kwargs):
123
- """
124
- Model framework library
125
- """
126
- pass
127
-
128
- @abstractmethod
129
- def model_library_version(self, *args, **kwargs):
130
- """
131
- Model framework library version
132
- """
133
- pass
134
-
135
- @abstractmethod
136
- def model_class(self, *args, **kwargs):
137
- """
138
- Predict method for the model. This is a wrapper around the model's
139
- """
140
- pass
141
-
142
- @abstractmethod
143
- def model_name(self, *args, **kwargs):
144
- """
145
- Model name
146
- """
147
- pass
148
-
149
148
 
150
149
  def has_method_with_arguments(cls, method_name, n_args):
151
150
  if not hasattr(cls, method_name):
@@ -195,11 +194,17 @@ def model_module(model):
195
194
  return module
196
195
 
197
196
 
198
- def get_model_class(model):
199
- model_class_name = SUPPORTED_LIBRARIES.get(model_module(model), None)
197
+ def get_model_class(model, predict_fn=None):
198
+ # TODO: more consistent way to determine this?!
199
+ if predict_fn is not None:
200
+ model_class_name = SUPPORTED_LIBRARIES["function"]
201
+ elif isinstance(model, ModelPipeline):
202
+ model_class_name = SUPPORTED_LIBRARIES["pipeline"]
203
+ else:
204
+ model_class_name = SUPPORTED_LIBRARIES.get(model_module(model), None)
200
205
 
201
- if model_class_name is None:
202
- raise Exception("Model library not supported")
206
+ if not model_class_name:
207
+ return None
203
208
 
204
209
  model_class = getattr(
205
210
  importlib.import_module("validmind.models"),
@@ -207,3 +212,23 @@ def get_model_class(model):
207
212
  )
208
213
 
209
214
  return model_class
215
+
216
+
217
+ def is_model_metadata(model):
218
+ """
219
+ Checks if the model is a dictionary containing metadata about a model.
220
+ We want to check if the metadata dictionary contains at least the following keys:
221
+
222
+ - architecture
223
+ - language
224
+ """
225
+ if not isinstance(model, dict):
226
+ return False
227
+
228
+ if "architecture" not in model:
229
+ return False
230
+
231
+ if "language" not in model:
232
+ return False
233
+
234
+ return True
@@ -6,15 +6,14 @@
6
6
  Class for storing ValidMind metric objects and associated
7
7
  data for display and reporting purposes
8
8
  """
9
- import os
10
9
  from abc import abstractmethod
11
10
  from dataclasses import dataclass
12
11
  from typing import ClassVar, List, Optional, Union
13
12
 
14
13
  import pandas as pd
15
14
 
16
- from ...ai import generate_description
17
15
  from ...errors import MissingCacheResultsArgumentsError
16
+ from ...utils import get_description_metadata
18
17
  from ..figure import Figure
19
18
  from .metric_result import MetricResult
20
19
  from .result_wrapper import MetricResultWrapper
@@ -83,30 +82,16 @@ class Metric(Test):
83
82
  summary=self.summary(metric_value),
84
83
  )
85
84
 
86
- if (
87
- os.environ.get("VALIDMIND_LLM_DESCRIPTIONS_ENABLED", "false").lower()
88
- == "true"
89
- ):
90
- revision_name = "Generated by ValidMind AI"
91
- description = generate_description(
92
- test_name=self.test_id,
93
- test_description=self.description().splitlines()[0],
94
- test_results=metric.serialize()["value"],
95
- test_summary=metric.serialize()["summary"],
96
- figures=figures,
97
- )
98
- else:
99
- revision_name = "Default Description"
100
- description = self.description()
101
-
102
- description_metadata = {
103
- "content_id": f"metric_description:{self.test_id}::{revision_name}",
104
- "text": description,
105
- }
106
-
107
85
  self.result = MetricResultWrapper(
108
86
  result_id=self.test_id,
109
- result_metadata=[description_metadata],
87
+ result_metadata=[
88
+ get_description_metadata(
89
+ test_id=self.test_id,
90
+ default_description=self.description(),
91
+ summary=metric.serialize()["summary"],
92
+ figures=figures,
93
+ )
94
+ ],
110
95
  metric=metric,
111
96
  figures=figures,
112
97
  inputs=self.get_accessed_inputs(),
@@ -12,20 +12,23 @@ from abc import ABC, abstractmethod
12
12
  from dataclasses import dataclass
13
13
  from typing import Dict, List, Optional, Union
14
14
 
15
- import ipywidgets as widgets
16
- import mistune
17
15
  import pandas as pd
18
- from IPython.display import display
16
+ from ipywidgets import HTML, GridBox, Layout, VBox
19
17
 
20
18
  from ... import api_client
21
19
  from ...ai import DescriptionFuture
22
- from ...utils import NumpyEncoder, run_async, test_id_to_name
20
+ from ...input_registry import input_registry
21
+ from ...logging import get_logger
22
+ from ...utils import NumpyEncoder, display, md_to_html, run_async, test_id_to_name
23
+ from ..dataset import VMDataset
23
24
  from ..figure import Figure
24
25
  from .metric_result import MetricResult
25
26
  from .output_template import OutputTemplate
26
27
  from .result_summary import ResultSummary
27
28
  from .threshold_test_result import ThresholdTestResults
28
29
 
30
+ logger = get_logger(__name__)
31
+
29
32
 
30
33
  async def update_metadata(content_id: str, text: str, _json: Union[Dict, List] = None):
31
34
  """
@@ -64,9 +67,9 @@ def plot_figures(figures: List[Figure]) -> None:
64
67
  plots = [figure.to_widget() for figure in figures]
65
68
 
66
69
  num_columns = 2 if len(figures) > 1 else 1
67
- return widgets.GridBox(
70
+ return GridBox(
68
71
  plots,
69
- layout=widgets.Layout(grid_template_columns=f"repeat({num_columns}, 1fr)"),
72
+ layout=Layout(grid_template_columns=f"repeat({num_columns}, 1fr)"),
70
73
  )
71
74
 
72
75
 
@@ -103,7 +106,7 @@ class ResultWrapper(ABC):
103
106
  """
104
107
  Convert a markdown string to html
105
108
  """
106
- return mistune.html(description)
109
+ return md_to_html(description)
107
110
 
108
111
  def _summary_tables_to_widget(self, summary: ResultSummary):
109
112
  """
@@ -148,10 +151,59 @@ class ResultWrapper(ABC):
148
151
  ) # table.data is an orient=records dump
149
152
 
150
153
  if table.metadata and table.metadata.title:
151
- tables.append(widgets.HTML(value=f"<h3>{table.metadata.title}</h3>"))
152
- tables.append(widgets.HTML(value=summary_table))
154
+ tables.append(HTML(value=f"<h3>{table.metadata.title}</h3>"))
155
+ tables.append(HTML(value=summary_table))
153
156
  return tables
154
157
 
158
+ def _validate_section_id_for_block(self, section_id: str, position: int = None):
159
+ """
160
+ Validate the section_id exits on the template before logging. We validate
161
+ if the section exists and if the user provided position is within the bounds
162
+ of the section. When the position is None, we assume it goes to the end of the section.
163
+ """
164
+ if section_id is None:
165
+ return
166
+
167
+ api_client.reload()
168
+ found = False
169
+ client_config = api_client.client_config
170
+
171
+ for section in client_config.documentation_template["sections"]:
172
+ if section["id"] == section_id:
173
+ found = True
174
+ break
175
+
176
+ if not found:
177
+ raise ValueError(
178
+ f"Section with id {section_id} not found in the model's document"
179
+ )
180
+
181
+ # Check if the block already exists in the section
182
+ block_definition = {
183
+ "content_id": self.result_id,
184
+ "content_type": (
185
+ "metric" if isinstance(self, MetricResultWrapper) else "test"
186
+ ),
187
+ }
188
+ blocks = section.get("contents", [])
189
+ for block in blocks:
190
+ if (
191
+ block["content_id"] == block_definition["content_id"]
192
+ and block["content_type"] == block_definition["content_type"]
193
+ ):
194
+ logger.info(
195
+ f"Test driven block with content_id {block_definition['content_id']} already exists in the document's section"
196
+ )
197
+ return
198
+
199
+ # Validate that the position is within the bounds of the section
200
+ if position is not None:
201
+ num_blocks = len(blocks)
202
+ if position < 0 or position > num_blocks:
203
+ raise ValueError(
204
+ f"Invalid position {position}. Must be between 0 and {num_blocks}"
205
+ )
206
+
155
207
  def show(self):
156
208
  """Display the result... May be overridden by subclasses"""
157
209
  display(self.to_widget())
@@ -161,9 +213,11 @@ class ResultWrapper(ABC):
161
213
  """Log the result... Must be overridden by subclasses"""
162
214
  raise NotImplementedError
163
215
 
164
- def log(self):
216
+ def log(self, section_id: str = None, position: int = None):
165
217
  """Log the result... May be overridden by subclasses"""
166
- run_async(self.log_async)
218
+
219
+ self._validate_section_id_for_block(section_id, position)
220
+ run_async(self.log_async, section_id=section_id, position=position)
167
221
 
168
222
 
169
223
  @dataclass
@@ -180,9 +234,7 @@ class FailedResultWrapper(ResultWrapper):
180
234
  return f'FailedResult(result_id="{self.result_id}")'
181
235
 
182
236
  def to_widget(self):
183
- return widgets.HTML(
184
- value=f"<h3 style='color: red;'>{self.message}</h3><p>{self.error}</p>"
185
- )
237
+ return HTML(f"<h3 style='color: red;'>{self.message}</h3><p>{self.error}</p>")
186
238
 
187
239
  async def log_async(self):
188
240
  pass
@@ -216,7 +268,7 @@ class MetricResultWrapper(ResultWrapper):
216
268
  return ""
217
269
 
218
270
  vbox_children = [
219
- widgets.HTML(value=f"<h1>{test_id_to_name(self.result_id)}</h1>"),
271
+ HTML(value=f"<h1>{test_id_to_name(self.result_id)}</h1>"),
220
272
  ]
221
273
 
222
274
  if self.result_metadata:
@@ -226,9 +278,7 @@ class MetricResultWrapper(ResultWrapper):
226
278
  self.result_metadata[0]["text"] = metric_description
227
279
 
228
280
  vbox_children.append(
229
- widgets.HTML(
230
- value=self._markdown_description_to_html(metric_description)
231
- )
281
+ HTML(value=self._markdown_description_to_html(metric_description))
232
282
  )
233
283
 
234
284
  if self.metric:
@@ -236,18 +286,18 @@ class MetricResultWrapper(ResultWrapper):
236
286
  rendered_output = OutputTemplate(self.output_template).render(
237
287
  value=self.metric.value
238
288
  )
239
- vbox_children.append(widgets.HTML(rendered_output))
289
+ vbox_children.append(HTML(rendered_output))
240
290
  elif self.metric.summary:
241
291
  tables = self._summary_tables_to_widget(self.metric.summary)
242
292
  vbox_children.extend(tables)
243
293
 
244
294
  if self.figures:
245
- vbox_children.append(widgets.HTML(value="<h3>Plots</h3>"))
295
+ vbox_children.append(HTML(value="<h3>Plots</h3>"))
246
296
  plot_widgets = plot_figures(self.figures)
247
297
  vbox_children.append(plot_widgets)
248
298
 
249
299
  vbox_children.append(
250
- widgets.HTML(
300
+ HTML(
251
301
  value="""
252
302
  <style>
253
303
  .metric-result {
@@ -293,21 +343,63 @@ class MetricResultWrapper(ResultWrapper):
293
343
  )
294
344
  )
295
345
 
296
- return widgets.VBox(vbox_children)
346
+ return VBox(vbox_children)
297
347
 
298
- async def log_async(self):
348
+ def _get_filtered_summary(self):
349
+ """Check if the metric summary has columns from input datasets"""
350
+ dataset_columns = set()
351
+
352
+ for input_id in self.inputs:
353
+ input_obj = input_registry.get(input_id)
354
+ if isinstance(input_obj, VMDataset):
355
+ dataset_columns.update(input_obj.columns)
356
+
357
+ for table in [*self.metric.summary.results]:
358
+ columns = set()
359
+
360
+ if isinstance(table.data, pd.DataFrame):
361
+ columns.update(table.data.columns)
362
+ elif isinstance(table.data, list):
363
+ columns.update(table.data[0].keys())
364
+ else:
365
+ raise ValueError("Invalid data type in summary table")
366
+
367
+ if bool(columns.intersection(dataset_columns)):
368
+ logger.warning(
369
+ "Sensitive data in metric summary table. Not logging to API automatically."
370
+ " Pass `unsafe=True` to result.log() method to override manually."
371
+ )
372
+ logger.warning(
373
+ f"The following columns are present in the table: {columns}"
374
+ f" and also present in the dataset: {dataset_columns}"
375
+ )
376
+
377
+ self.metric.summary.results.remove(table)
378
+
379
+ return self.metric.summary
380
+
381
+ async def log_async(
382
+ self, section_id: str = None, position: int = None, unsafe=False
383
+ ):
299
384
  tasks = [] # collect tasks to run in parallel (async)
300
385
 
301
386
  if self.metric:
387
+ if self.metric.summary and not unsafe:
388
+ self.metric.summary = self._get_filtered_summary()
389
+
302
390
  tasks.append(
303
391
  api_client.log_metrics(
304
392
  metrics=[self.metric],
305
393
  inputs=self.inputs,
306
394
  output_template=self.output_template,
395
+ section_id=section_id,
396
+ position=position,
307
397
  )
308
398
  )
399
+
309
400
  if self.figures:
310
401
  tasks.append(api_client.log_figures(self.figures))
402
+
311
403
  if hasattr(self, "result_metadata") and self.result_metadata:
312
404
  description = self.result_metadata[0].get("text", "")
313
405
  if isinstance(description, DescriptionFuture):
@@ -383,21 +475,25 @@ class ThresholdTestResultWrapper(ResultWrapper):
383
475
  """
384
476
  )
385
477
 
386
- vbox_children.append(widgets.HTML(value="".join(description_html)))
478
+ vbox_children.append(HTML(value="".join(description_html)))
387
479
 
388
480
  if self.test_results.summary:
389
481
  tables = self._summary_tables_to_widget(self.test_results.summary)
390
482
  vbox_children.extend(tables)
391
483
 
392
484
  if self.figures:
393
- vbox_children.append(widgets.HTML(value="<h3>Plots</h3>"))
485
+ vbox_children.append(HTML(value="<h3>Plots</h3>"))
394
486
  plot_widgets = plot_figures(self.figures)
395
487
  vbox_children.append(plot_widgets)
396
488
 
397
- return widgets.VBox(vbox_children)
489
+ return VBox(vbox_children)
398
490
 
399
- async def log_async(self):
400
- tasks = [api_client.log_test_result(self.test_results, self.inputs)]
491
+ async def log_async(self, section_id: str = None, position: int = None):
492
+ tasks = [
493
+ api_client.log_test_result(
494
+ self.test_results, self.inputs, section_id, position
495
+ )
496
+ ]
401
497
 
402
498
  if self.figures:
403
499
  tasks.append(api_client.log_figures(self.figures))
@@ -8,11 +8,10 @@ Test (as test_results) but we'll refer to it as a ThresholdTest to
8
8
  avoid confusion with the "tests" in the general data science/modeling sense.
9
9
  """
10
10
 
11
- import os
12
11
  from dataclasses import dataclass
13
12
  from typing import ClassVar, List, Optional
14
13
 
15
- from ...ai import generate_description
14
+ from ...utils import get_description_metadata
16
15
  from ..figure import Figure
17
16
  from .result_summary import ResultSummary, ResultTable
18
17
  from .result_wrapper import ThresholdTestResultWrapper
@@ -79,30 +78,16 @@ class ThresholdTest(Test):
79
78
  """
80
79
  result_summary = self.summary(test_results_list, passed)
81
80
 
82
- if (
83
- os.environ.get("VALIDMIND_LLM_DESCRIPTIONS_ENABLED", "false").lower()
84
- == "true"
85
- ):
86
- revision_name = "Generated by ValidMind AI"
87
- description = generate_description(
88
- test_name=self.test_id,
89
- test_description=self.description().splitlines()[0],
90
- test_results=[result.serialize() for result in test_results_list],
91
- test_summary=result_summary.serialize(),
92
- figures=figures,
93
- )
94
- else:
95
- revision_name = "Default Description"
96
- description = self.description()
97
-
98
- description_metadata = {
99
- "content_id": f"test_description:{self.test_id}::{revision_name}",
100
- "text": description,
101
- }
102
-
103
81
  self.result = ThresholdTestResultWrapper(
104
82
  result_id=self.test_id,
105
- result_metadata=[description_metadata],
83
+ result_metadata=[
84
+ get_description_metadata(
85
+ test_id=self.test_id,
86
+ default_description=self.description(),
87
+ summary=result_summary.serialize(),
88
+ figures=figures,
89
+ )
90
+ ],
106
91
  inputs=self.get_accessed_inputs(),
107
92
  test_results=ThresholdTestResults(
108
93
  test_name=self.test_id,
@@ -112,10 +97,7 @@ class ThresholdTest(Test):
112
97
  results=test_results_list,
113
98
  summary=result_summary,
114
99
  ),
100
+ figures=figures,
115
101
  )
116
102
 
117
- # Allow test results to attach figures to the test suite result
118
- if figures:
119
- self.result.figures = figures
120
-
121
103
  return self.result
@@ -20,7 +20,7 @@ from validmind.input_registry import input_registry
20
20
 
21
21
  from ..errors import MissingRequiredTestInputError
22
22
  from ..logging import get_logger
23
- from .dataset import VMDataset
23
+ from .dataset.dataset import VMDataset
24
24
  from .model import VMModel
25
25
 
26
26
  # More human readable context names for error messages
@@ -6,10 +6,9 @@ from dataclasses import dataclass
6
6
  from typing import List, Optional
7
7
 
8
8
  import ipywidgets as widgets
9
- import mistune
10
- from IPython.display import display
11
9
 
12
10
  from ...logging import get_logger
11
+ from ...utils import display, md_to_html
13
12
  from ..test.result_wrapper import FailedResultWrapper
14
13
  from .test_suite import TestSuiteSection, TestSuiteTest
15
14
 
@@ -36,7 +35,7 @@ class TestSuiteSectionSummary:
36
35
  self._build_summary()
37
36
 
38
37
  def _add_description(self):
39
- description = f'<div class="result">{mistune.html(self.description)}</div>'
38
+ description = f'<div class="result">{md_to_html(self.description)}</div>'
40
39
  self._widgets.append(widgets.HTML(value=description))
41
40
 
42
41
  def _add_tests_summary(self):
@@ -101,7 +100,7 @@ class TestSuiteSummary:
101
100
  self._widgets.append(widgets.HTML(value=results_link))
102
101
 
103
102
  def _add_description(self):
104
- description = f'<div class="result">{mistune.html(self.description)}</div>'
103
+ description = f'<div class="result">{md_to_html(self.description)}</div>'
105
104
  self._widgets.append(widgets.HTML(value=description))
106
105
 
107
106
  def _add_sections_summary(self):