validmind 2.1.1__py3-none-any.whl → 2.2.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- validmind/__version__.py +1 -1
- validmind/ai.py +72 -49
- validmind/api_client.py +42 -16
- validmind/client.py +68 -25
- validmind/datasets/llm/rag/__init__.py +11 -0
- validmind/datasets/llm/rag/datasets/rfp_existing_questions_client_1.csv +30 -0
- validmind/datasets/llm/rag/datasets/rfp_existing_questions_client_2.csv +30 -0
- validmind/datasets/llm/rag/datasets/rfp_existing_questions_client_3.csv +53 -0
- validmind/datasets/llm/rag/datasets/rfp_existing_questions_client_4.csv +53 -0
- validmind/datasets/llm/rag/datasets/rfp_existing_questions_client_5.csv +53 -0
- validmind/datasets/llm/rag/rfp.py +41 -0
- validmind/errors.py +1 -1
- validmind/html_templates/__init__.py +0 -0
- validmind/html_templates/content_blocks.py +89 -14
- validmind/models/__init__.py +7 -4
- validmind/models/foundation.py +8 -34
- validmind/models/function.py +51 -0
- validmind/models/huggingface.py +16 -46
- validmind/models/metadata.py +42 -0
- validmind/models/pipeline.py +66 -0
- validmind/models/pytorch.py +8 -42
- validmind/models/r_model.py +33 -82
- validmind/models/sklearn.py +39 -38
- validmind/template.py +8 -26
- validmind/tests/__init__.py +43 -20
- validmind/tests/data_validation/ANOVAOneWayTable.py +1 -1
- validmind/tests/data_validation/ChiSquaredFeaturesTable.py +1 -1
- validmind/tests/data_validation/DescriptiveStatistics.py +2 -4
- validmind/tests/data_validation/Duplicates.py +1 -1
- validmind/tests/data_validation/IsolationForestOutliers.py +2 -2
- validmind/tests/data_validation/LaggedCorrelationHeatmap.py +1 -1
- validmind/tests/data_validation/TargetRateBarPlots.py +1 -1
- validmind/tests/data_validation/nlp/LanguageDetection.py +59 -0
- validmind/tests/data_validation/nlp/PolarityAndSubjectivity.py +48 -0
- validmind/tests/data_validation/nlp/Punctuations.py +11 -12
- validmind/tests/data_validation/nlp/Sentiment.py +57 -0
- validmind/tests/data_validation/nlp/Toxicity.py +45 -0
- validmind/tests/decorator.py +12 -7
- validmind/tests/model_validation/BertScore.py +100 -98
- validmind/tests/model_validation/BleuScore.py +93 -64
- validmind/tests/model_validation/ContextualRecall.py +74 -91
- validmind/tests/model_validation/MeteorScore.py +86 -74
- validmind/tests/model_validation/RegardScore.py +103 -121
- validmind/tests/model_validation/RougeScore.py +118 -0
- validmind/tests/model_validation/TokenDisparity.py +84 -121
- validmind/tests/model_validation/ToxicityScore.py +109 -123
- validmind/tests/model_validation/embeddings/CosineSimilarityComparison.py +96 -0
- validmind/tests/model_validation/embeddings/CosineSimilarityHeatmap.py +71 -0
- validmind/tests/model_validation/embeddings/EuclideanDistanceComparison.py +92 -0
- validmind/tests/model_validation/embeddings/EuclideanDistanceHeatmap.py +69 -0
- validmind/tests/model_validation/embeddings/PCAComponentsPairwisePlots.py +78 -0
- validmind/tests/model_validation/embeddings/StabilityAnalysis.py +35 -23
- validmind/tests/model_validation/embeddings/StabilityAnalysisKeyword.py +3 -0
- validmind/tests/model_validation/embeddings/StabilityAnalysisRandomNoise.py +7 -1
- validmind/tests/model_validation/embeddings/StabilityAnalysisSynonyms.py +3 -0
- validmind/tests/model_validation/embeddings/StabilityAnalysisTranslation.py +3 -0
- validmind/tests/model_validation/embeddings/TSNEComponentsPairwisePlots.py +99 -0
- validmind/tests/model_validation/ragas/AnswerCorrectness.py +131 -0
- validmind/tests/model_validation/ragas/AnswerRelevance.py +134 -0
- validmind/tests/model_validation/ragas/AnswerSimilarity.py +119 -0
- validmind/tests/model_validation/ragas/AspectCritique.py +167 -0
- validmind/tests/model_validation/ragas/ContextEntityRecall.py +133 -0
- validmind/tests/model_validation/ragas/ContextPrecision.py +123 -0
- validmind/tests/model_validation/ragas/ContextRecall.py +123 -0
- validmind/tests/model_validation/ragas/ContextRelevancy.py +114 -0
- validmind/tests/model_validation/ragas/Faithfulness.py +119 -0
- validmind/tests/model_validation/ragas/utils.py +66 -0
- validmind/tests/model_validation/sklearn/OverfitDiagnosis.py +3 -7
- validmind/tests/model_validation/sklearn/PermutationFeatureImportance.py +8 -9
- validmind/tests/model_validation/sklearn/PopulationStabilityIndex.py +5 -10
- validmind/tests/model_validation/sklearn/PrecisionRecallCurve.py +3 -2
- validmind/tests/model_validation/sklearn/ROCCurve.py +2 -1
- validmind/tests/model_validation/sklearn/RegressionR2Square.py +1 -1
- validmind/tests/model_validation/sklearn/RobustnessDiagnosis.py +2 -3
- validmind/tests/model_validation/sklearn/SHAPGlobalImportance.py +7 -11
- validmind/tests/model_validation/sklearn/WeakspotsDiagnosis.py +3 -4
- validmind/tests/model_validation/statsmodels/RegressionModelForecastPlot.py +1 -1
- validmind/tests/model_validation/statsmodels/RegressionModelForecastPlotLevels.py +1 -1
- validmind/tests/model_validation/statsmodels/RegressionModelInsampleComparison.py +1 -1
- validmind/tests/model_validation/statsmodels/RegressionModelOutsampleComparison.py +1 -1
- validmind/tests/model_validation/statsmodels/RegressionModelSummary.py +1 -1
- validmind/tests/model_validation/statsmodels/RegressionModelsCoeffs.py +1 -1
- validmind/tests/model_validation/statsmodels/RegressionModelsPerformance.py +1 -1
- validmind/tests/model_validation/statsmodels/ScorecardHistogram.py +5 -6
- validmind/unit_metrics/__init__.py +26 -49
- validmind/unit_metrics/composite.py +13 -7
- validmind/unit_metrics/regression/sklearn/AdjustedRSquaredScore.py +1 -1
- validmind/utils.py +99 -6
- validmind/vm_models/__init__.py +1 -1
- validmind/vm_models/dataset/__init__.py +7 -0
- validmind/vm_models/dataset/dataset.py +560 -0
- validmind/vm_models/dataset/utils.py +146 -0
- validmind/vm_models/model.py +97 -72
- validmind/vm_models/test/metric.py +9 -24
- validmind/vm_models/test/result_wrapper.py +124 -28
- validmind/vm_models/test/threshold_test.py +10 -28
- validmind/vm_models/test_context.py +1 -1
- validmind/vm_models/test_suite/summary.py +3 -4
- {validmind-2.1.1.dist-info → validmind-2.2.4.dist-info}/METADATA +5 -3
- {validmind-2.1.1.dist-info → validmind-2.2.4.dist-info}/RECORD +103 -78
- validmind/models/catboost.py +0 -33
- validmind/models/statsmodels.py +0 -50
- validmind/models/xgboost.py +0 -30
- validmind/tests/model_validation/BertScoreAggregate.py +0 -90
- validmind/tests/model_validation/RegardHistogram.py +0 -148
- validmind/tests/model_validation/RougeMetrics.py +0 -147
- validmind/tests/model_validation/RougeMetricsAggregate.py +0 -133
- validmind/tests/model_validation/SelfCheckNLIScore.py +0 -112
- validmind/tests/model_validation/ToxicityHistogram.py +0 -136
- validmind/vm_models/dataset.py +0 -1303
- {validmind-2.1.1.dist-info → validmind-2.2.4.dist-info}/LICENSE +0 -0
- {validmind-2.1.1.dist-info → validmind-2.2.4.dist-info}/WHEEL +0 -0
- {validmind-2.1.1.dist-info → validmind-2.2.4.dist-info}/entry_points.txt +0 -0
validmind/vm_models/model.py
CHANGED
@@ -7,9 +7,11 @@ Model class wrapper module
|
|
7
7
|
"""
|
8
8
|
import importlib
|
9
9
|
import inspect
|
10
|
-
from abc import abstractmethod
|
10
|
+
from abc import ABC, abstractmethod
|
11
11
|
from dataclasses import dataclass
|
12
12
|
|
13
|
+
from validmind.errors import MissingOrInvalidModelPredictFnError
|
14
|
+
|
13
15
|
SUPPORTED_LIBRARIES = {
|
14
16
|
"catboost": "CatBoostModel",
|
15
17
|
"xgboost": "XGBoostModel",
|
@@ -17,6 +19,8 @@ SUPPORTED_LIBRARIES = {
|
|
17
19
|
"statsmodels": "StatsModelsModel",
|
18
20
|
"torch": "PyTorchModel",
|
19
21
|
"transformers": "HFModel",
|
22
|
+
"function": "FunctionModel",
|
23
|
+
"pipeline": "PipelineModel",
|
20
24
|
"custom": "SKlearnModel",
|
21
25
|
}
|
22
26
|
|
@@ -32,6 +36,23 @@ R_MODEL_METHODS = [
|
|
32
36
|
]
|
33
37
|
|
34
38
|
|
39
|
+
class ModelPipeline:
|
40
|
+
"""Helper class for chaining models together
|
41
|
+
|
42
|
+
This shouldn't be used directly, it just gets used when chaining models with the
|
43
|
+
`|` operator since you can't use a list directly - you must use a type that
|
44
|
+
overloads the `|` operator.
|
45
|
+
"""
|
46
|
+
|
47
|
+
def __init__(self, models):
|
48
|
+
self.models = models
|
49
|
+
|
50
|
+
def __or__(self, other):
|
51
|
+
self.models.append(other)
|
52
|
+
|
53
|
+
return self
|
54
|
+
|
55
|
+
|
35
56
|
@dataclass
|
36
57
|
class ModelAttributes:
|
37
58
|
"""
|
@@ -41,51 +62,67 @@ class ModelAttributes:
|
|
41
62
|
architecture: str = None
|
42
63
|
framework: str = None
|
43
64
|
framework_version: str = None
|
65
|
+
language: str = None
|
44
66
|
|
67
|
+
@classmethod
|
68
|
+
def from_dict(cls, data):
|
69
|
+
"""
|
70
|
+
Creates a ModelAttributes instance from a dictionary
|
71
|
+
"""
|
72
|
+
return cls(
|
73
|
+
architecture=data.get("architecture"),
|
74
|
+
framework=data.get("framework"),
|
75
|
+
framework_version=data.get("framework_version"),
|
76
|
+
language=data.get("language"),
|
77
|
+
)
|
45
78
|
|
46
|
-
|
79
|
+
|
80
|
+
class VMModel(ABC):
|
47
81
|
"""
|
48
82
|
An base class that wraps a trained model instance and its associated data.
|
49
83
|
|
50
84
|
Attributes:
|
51
|
-
attributes (ModelAttributes, optional): The attributes of the model. Defaults to None.
|
52
85
|
model (object, optional): The trained model instance. Defaults to None.
|
53
|
-
|
86
|
+
input_id (str, optional): The input ID for the model. Defaults to None.
|
87
|
+
attributes (ModelAttributes, optional): The attributes of the model. Defaults to None.
|
88
|
+
name (str, optional): The name of the model. Defaults to the class name.
|
54
89
|
"""
|
55
90
|
|
56
|
-
input_id: str = None
|
57
|
-
|
58
91
|
def __init__(
|
59
92
|
self,
|
60
93
|
input_id: str = None,
|
61
94
|
model: object = None,
|
62
95
|
attributes: ModelAttributes = None,
|
96
|
+
name: str = None,
|
97
|
+
**kwargs,
|
63
98
|
):
|
64
|
-
self.
|
65
|
-
self.
|
66
|
-
self._attributes = attributes
|
99
|
+
self.model = model
|
100
|
+
self.input_id = input_id
|
67
101
|
|
68
|
-
|
69
|
-
self.
|
102
|
+
self.language = "Python"
|
103
|
+
self.library = self.__class__.__name__
|
104
|
+
self.library_version = "N/A"
|
105
|
+
self.class_ = self.__class__.__name__
|
70
106
|
|
71
|
-
|
72
|
-
def attributes(self):
|
73
|
-
return self._attributes
|
107
|
+
self.name = name or self.__class__.__name__
|
74
108
|
|
75
|
-
|
76
|
-
def input_id(self):
|
77
|
-
return self._input_id
|
109
|
+
self.attributes = attributes
|
78
110
|
|
79
|
-
|
80
|
-
|
81
|
-
|
111
|
+
# set any additional attributes passed in (likely for subclasses)
|
112
|
+
for key, value in kwargs.items():
|
113
|
+
setattr(self, key, value)
|
82
114
|
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
115
|
+
self.__post_init__()
|
116
|
+
|
117
|
+
def __post_init__(self):
|
118
|
+
"""Allows child classes to add their own post-init logic"""
|
119
|
+
pass
|
120
|
+
|
121
|
+
def __or__(self, other):
|
122
|
+
if not isinstance(other, VMModel):
|
123
|
+
raise ValueError("Can only chain VMModel objects")
|
124
|
+
|
125
|
+
return ModelPipeline([self, other])
|
89
126
|
|
90
127
|
def serialize(self):
|
91
128
|
"""
|
@@ -95,13 +132,11 @@ class VMModel:
|
|
95
132
|
"attributes": self.attributes.__dict__,
|
96
133
|
}
|
97
134
|
|
98
|
-
@abstractmethod
|
99
135
|
def predict_proba(self, *args, **kwargs):
|
100
|
-
"""
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
pass
|
136
|
+
"""Predict probabilties - must be implemented by subclass if needed"""
|
137
|
+
raise MissingOrInvalidModelPredictFnError(
|
138
|
+
"`predict_proba()` method not implemented for this model"
|
139
|
+
)
|
105
140
|
|
106
141
|
@abstractmethod
|
107
142
|
def predict(self, *args, **kwargs):
|
@@ -110,42 +145,6 @@ class VMModel:
|
|
110
145
|
"""
|
111
146
|
pass
|
112
147
|
|
113
|
-
@abstractmethod
|
114
|
-
def model_language(self, *args, **kwargs):
|
115
|
-
"""
|
116
|
-
Programming language used to train the model. Assume Python if this
|
117
|
-
method is not implemented
|
118
|
-
"""
|
119
|
-
pass
|
120
|
-
|
121
|
-
@abstractmethod
|
122
|
-
def model_library(self, *args, **kwargs):
|
123
|
-
"""
|
124
|
-
Model framework library
|
125
|
-
"""
|
126
|
-
pass
|
127
|
-
|
128
|
-
@abstractmethod
|
129
|
-
def model_library_version(self, *args, **kwargs):
|
130
|
-
"""
|
131
|
-
Model framework library version
|
132
|
-
"""
|
133
|
-
pass
|
134
|
-
|
135
|
-
@abstractmethod
|
136
|
-
def model_class(self, *args, **kwargs):
|
137
|
-
"""
|
138
|
-
Predict method for the model. This is a wrapper around the model's
|
139
|
-
"""
|
140
|
-
pass
|
141
|
-
|
142
|
-
@abstractmethod
|
143
|
-
def model_name(self, *args, **kwargs):
|
144
|
-
"""
|
145
|
-
Model name
|
146
|
-
"""
|
147
|
-
pass
|
148
|
-
|
149
148
|
|
150
149
|
def has_method_with_arguments(cls, method_name, n_args):
|
151
150
|
if not hasattr(cls, method_name):
|
@@ -195,11 +194,17 @@ def model_module(model):
|
|
195
194
|
return module
|
196
195
|
|
197
196
|
|
198
|
-
def get_model_class(model):
|
199
|
-
|
197
|
+
def get_model_class(model, predict_fn=None):
|
198
|
+
# TODO: more consistent way to determine this?!
|
199
|
+
if predict_fn is not None:
|
200
|
+
model_class_name = SUPPORTED_LIBRARIES["function"]
|
201
|
+
elif isinstance(model, ModelPipeline):
|
202
|
+
model_class_name = SUPPORTED_LIBRARIES["pipeline"]
|
203
|
+
else:
|
204
|
+
model_class_name = SUPPORTED_LIBRARIES.get(model_module(model), None)
|
200
205
|
|
201
|
-
if model_class_name
|
202
|
-
|
206
|
+
if not model_class_name:
|
207
|
+
return None
|
203
208
|
|
204
209
|
model_class = getattr(
|
205
210
|
importlib.import_module("validmind.models"),
|
@@ -207,3 +212,23 @@ def get_model_class(model):
|
|
207
212
|
)
|
208
213
|
|
209
214
|
return model_class
|
215
|
+
|
216
|
+
|
217
|
+
def is_model_metadata(model):
|
218
|
+
"""
|
219
|
+
Checks if the model is a dictionary containing metadata about a model.
|
220
|
+
We want to check if the metadata dictionary contains at least the following keys:
|
221
|
+
|
222
|
+
- architecture
|
223
|
+
- language
|
224
|
+
"""
|
225
|
+
if not isinstance(model, dict):
|
226
|
+
return False
|
227
|
+
|
228
|
+
if "architecture" not in model:
|
229
|
+
return False
|
230
|
+
|
231
|
+
if "language" not in model:
|
232
|
+
return False
|
233
|
+
|
234
|
+
return True
|
@@ -6,15 +6,14 @@
|
|
6
6
|
Class for storing ValidMind metric objects and associated
|
7
7
|
data for display and reporting purposes
|
8
8
|
"""
|
9
|
-
import os
|
10
9
|
from abc import abstractmethod
|
11
10
|
from dataclasses import dataclass
|
12
11
|
from typing import ClassVar, List, Optional, Union
|
13
12
|
|
14
13
|
import pandas as pd
|
15
14
|
|
16
|
-
from ...ai import generate_description
|
17
15
|
from ...errors import MissingCacheResultsArgumentsError
|
16
|
+
from ...utils import get_description_metadata
|
18
17
|
from ..figure import Figure
|
19
18
|
from .metric_result import MetricResult
|
20
19
|
from .result_wrapper import MetricResultWrapper
|
@@ -83,30 +82,16 @@ class Metric(Test):
|
|
83
82
|
summary=self.summary(metric_value),
|
84
83
|
)
|
85
84
|
|
86
|
-
if (
|
87
|
-
os.environ.get("VALIDMIND_LLM_DESCRIPTIONS_ENABLED", "false").lower()
|
88
|
-
== "true"
|
89
|
-
):
|
90
|
-
revision_name = "Generated by ValidMind AI"
|
91
|
-
description = generate_description(
|
92
|
-
test_name=self.test_id,
|
93
|
-
test_description=self.description().splitlines()[0],
|
94
|
-
test_results=metric.serialize()["value"],
|
95
|
-
test_summary=metric.serialize()["summary"],
|
96
|
-
figures=figures,
|
97
|
-
)
|
98
|
-
else:
|
99
|
-
revision_name = "Default Description"
|
100
|
-
description = self.description()
|
101
|
-
|
102
|
-
description_metadata = {
|
103
|
-
"content_id": f"metric_description:{self.test_id}::{revision_name}",
|
104
|
-
"text": description,
|
105
|
-
}
|
106
|
-
|
107
85
|
self.result = MetricResultWrapper(
|
108
86
|
result_id=self.test_id,
|
109
|
-
result_metadata=[
|
87
|
+
result_metadata=[
|
88
|
+
get_description_metadata(
|
89
|
+
test_id=self.test_id,
|
90
|
+
default_description=self.description(),
|
91
|
+
summary=metric.serialize()["summary"],
|
92
|
+
figures=figures,
|
93
|
+
)
|
94
|
+
],
|
110
95
|
metric=metric,
|
111
96
|
figures=figures,
|
112
97
|
inputs=self.get_accessed_inputs(),
|
@@ -12,20 +12,23 @@ from abc import ABC, abstractmethod
|
|
12
12
|
from dataclasses import dataclass
|
13
13
|
from typing import Dict, List, Optional, Union
|
14
14
|
|
15
|
-
import ipywidgets as widgets
|
16
|
-
import mistune
|
17
15
|
import pandas as pd
|
18
|
-
from
|
16
|
+
from ipywidgets import HTML, GridBox, Layout, VBox
|
19
17
|
|
20
18
|
from ... import api_client
|
21
19
|
from ...ai import DescriptionFuture
|
22
|
-
from ...
|
20
|
+
from ...input_registry import input_registry
|
21
|
+
from ...logging import get_logger
|
22
|
+
from ...utils import NumpyEncoder, display, md_to_html, run_async, test_id_to_name
|
23
|
+
from ..dataset import VMDataset
|
23
24
|
from ..figure import Figure
|
24
25
|
from .metric_result import MetricResult
|
25
26
|
from .output_template import OutputTemplate
|
26
27
|
from .result_summary import ResultSummary
|
27
28
|
from .threshold_test_result import ThresholdTestResults
|
28
29
|
|
30
|
+
logger = get_logger(__name__)
|
31
|
+
|
29
32
|
|
30
33
|
async def update_metadata(content_id: str, text: str, _json: Union[Dict, List] = None):
|
31
34
|
"""
|
@@ -64,9 +67,9 @@ def plot_figures(figures: List[Figure]) -> None:
|
|
64
67
|
plots = [figure.to_widget() for figure in figures]
|
65
68
|
|
66
69
|
num_columns = 2 if len(figures) > 1 else 1
|
67
|
-
return
|
70
|
+
return GridBox(
|
68
71
|
plots,
|
69
|
-
layout=
|
72
|
+
layout=Layout(grid_template_columns=f"repeat({num_columns}, 1fr)"),
|
70
73
|
)
|
71
74
|
|
72
75
|
|
@@ -103,7 +106,7 @@ class ResultWrapper(ABC):
|
|
103
106
|
"""
|
104
107
|
Convert a markdown string to html
|
105
108
|
"""
|
106
|
-
return
|
109
|
+
return md_to_html(description)
|
107
110
|
|
108
111
|
def _summary_tables_to_widget(self, summary: ResultSummary):
|
109
112
|
"""
|
@@ -148,10 +151,59 @@ class ResultWrapper(ABC):
|
|
148
151
|
) # table.data is an orient=records dump
|
149
152
|
|
150
153
|
if table.metadata and table.metadata.title:
|
151
|
-
tables.append(
|
152
|
-
tables.append(
|
154
|
+
tables.append(HTML(value=f"<h3>{table.metadata.title}</h3>"))
|
155
|
+
tables.append(HTML(value=summary_table))
|
153
156
|
return tables
|
154
157
|
|
158
|
+
def _validate_section_id_for_block(self, section_id: str, position: int = None):
|
159
|
+
"""
|
160
|
+
Validate the section_id exits on the template before logging. We validate
|
161
|
+
if the section exists and if the user provided position is within the bounds
|
162
|
+
of the section. When the position is None, we assume it goes to the end of the section.
|
163
|
+
"""
|
164
|
+
if section_id is None:
|
165
|
+
return
|
166
|
+
|
167
|
+
api_client.reload()
|
168
|
+
found = False
|
169
|
+
client_config = api_client.client_config
|
170
|
+
|
171
|
+
for section in client_config.documentation_template["sections"]:
|
172
|
+
if section["id"] == section_id:
|
173
|
+
found = True
|
174
|
+
break
|
175
|
+
|
176
|
+
if not found:
|
177
|
+
raise ValueError(
|
178
|
+
f"Section with id {section_id} not found in the model's document"
|
179
|
+
)
|
180
|
+
|
181
|
+
# Check if the block already exists in the section
|
182
|
+
block_definition = {
|
183
|
+
"content_id": self.result_id,
|
184
|
+
"content_type": (
|
185
|
+
"metric" if isinstance(self, MetricResultWrapper) else "test"
|
186
|
+
),
|
187
|
+
}
|
188
|
+
blocks = section.get("contents", [])
|
189
|
+
for block in blocks:
|
190
|
+
if (
|
191
|
+
block["content_id"] == block_definition["content_id"]
|
192
|
+
and block["content_type"] == block_definition["content_type"]
|
193
|
+
):
|
194
|
+
logger.info(
|
195
|
+
f"Test driven block with content_id {block_definition['content_id']} already exists in the document's section"
|
196
|
+
)
|
197
|
+
return
|
198
|
+
|
199
|
+
# Validate that the position is within the bounds of the section
|
200
|
+
if position is not None:
|
201
|
+
num_blocks = len(blocks)
|
202
|
+
if position < 0 or position > num_blocks:
|
203
|
+
raise ValueError(
|
204
|
+
f"Invalid position {position}. Must be between 0 and {num_blocks}"
|
205
|
+
)
|
206
|
+
|
155
207
|
def show(self):
|
156
208
|
"""Display the result... May be overridden by subclasses"""
|
157
209
|
display(self.to_widget())
|
@@ -161,9 +213,11 @@ class ResultWrapper(ABC):
|
|
161
213
|
"""Log the result... Must be overridden by subclasses"""
|
162
214
|
raise NotImplementedError
|
163
215
|
|
164
|
-
def log(self):
|
216
|
+
def log(self, section_id: str = None, position: int = None):
|
165
217
|
"""Log the result... May be overridden by subclasses"""
|
166
|
-
|
218
|
+
|
219
|
+
self._validate_section_id_for_block(section_id, position)
|
220
|
+
run_async(self.log_async, section_id=section_id, position=position)
|
167
221
|
|
168
222
|
|
169
223
|
@dataclass
|
@@ -180,9 +234,7 @@ class FailedResultWrapper(ResultWrapper):
|
|
180
234
|
return f'FailedResult(result_id="{self.result_id}")'
|
181
235
|
|
182
236
|
def to_widget(self):
|
183
|
-
return
|
184
|
-
value=f"<h3 style='color: red;'>{self.message}</h3><p>{self.error}</p>"
|
185
|
-
)
|
237
|
+
return HTML(f"<h3 style='color: red;'>{self.message}</h3><p>{self.error}</p>")
|
186
238
|
|
187
239
|
async def log_async(self):
|
188
240
|
pass
|
@@ -216,7 +268,7 @@ class MetricResultWrapper(ResultWrapper):
|
|
216
268
|
return ""
|
217
269
|
|
218
270
|
vbox_children = [
|
219
|
-
|
271
|
+
HTML(value=f"<h1>{test_id_to_name(self.result_id)}</h1>"),
|
220
272
|
]
|
221
273
|
|
222
274
|
if self.result_metadata:
|
@@ -226,9 +278,7 @@ class MetricResultWrapper(ResultWrapper):
|
|
226
278
|
self.result_metadata[0]["text"] = metric_description
|
227
279
|
|
228
280
|
vbox_children.append(
|
229
|
-
|
230
|
-
value=self._markdown_description_to_html(metric_description)
|
231
|
-
)
|
281
|
+
HTML(value=self._markdown_description_to_html(metric_description))
|
232
282
|
)
|
233
283
|
|
234
284
|
if self.metric:
|
@@ -236,18 +286,18 @@ class MetricResultWrapper(ResultWrapper):
|
|
236
286
|
rendered_output = OutputTemplate(self.output_template).render(
|
237
287
|
value=self.metric.value
|
238
288
|
)
|
239
|
-
vbox_children.append(
|
289
|
+
vbox_children.append(HTML(rendered_output))
|
240
290
|
elif self.metric.summary:
|
241
291
|
tables = self._summary_tables_to_widget(self.metric.summary)
|
242
292
|
vbox_children.extend(tables)
|
243
293
|
|
244
294
|
if self.figures:
|
245
|
-
vbox_children.append(
|
295
|
+
vbox_children.append(HTML(value="<h3>Plots</h3>"))
|
246
296
|
plot_widgets = plot_figures(self.figures)
|
247
297
|
vbox_children.append(plot_widgets)
|
248
298
|
|
249
299
|
vbox_children.append(
|
250
|
-
|
300
|
+
HTML(
|
251
301
|
value="""
|
252
302
|
<style>
|
253
303
|
.metric-result {
|
@@ -293,21 +343,63 @@ class MetricResultWrapper(ResultWrapper):
|
|
293
343
|
)
|
294
344
|
)
|
295
345
|
|
296
|
-
return
|
346
|
+
return VBox(vbox_children)
|
297
347
|
|
298
|
-
|
348
|
+
def _get_filtered_summary(self):
|
349
|
+
"""Check if the metric summary has columns from input datasets"""
|
350
|
+
dataset_columns = set()
|
351
|
+
|
352
|
+
for input_id in self.inputs:
|
353
|
+
input_obj = input_registry.get(input_id)
|
354
|
+
if isinstance(input_obj, VMDataset):
|
355
|
+
dataset_columns.update(input_obj.columns)
|
356
|
+
|
357
|
+
for table in [*self.metric.summary.results]:
|
358
|
+
columns = set()
|
359
|
+
|
360
|
+
if isinstance(table.data, pd.DataFrame):
|
361
|
+
columns.update(table.data.columns)
|
362
|
+
elif isinstance(table.data, list):
|
363
|
+
columns.update(table.data[0].keys())
|
364
|
+
else:
|
365
|
+
raise ValueError("Invalid data type in summary table")
|
366
|
+
|
367
|
+
if bool(columns.intersection(dataset_columns)):
|
368
|
+
logger.warning(
|
369
|
+
"Sensitive data in metric summary table. Not logging to API automatically."
|
370
|
+
" Pass `unsafe=True` to result.log() method to override manually."
|
371
|
+
)
|
372
|
+
logger.warning(
|
373
|
+
f"The following columns are present in the table: {columns}"
|
374
|
+
f" and also present in the dataset: {dataset_columns}"
|
375
|
+
)
|
376
|
+
|
377
|
+
self.metric.summary.results.remove(table)
|
378
|
+
|
379
|
+
return self.metric.summary
|
380
|
+
|
381
|
+
async def log_async(
|
382
|
+
self, section_id: str = None, position: int = None, unsafe=False
|
383
|
+
):
|
299
384
|
tasks = [] # collect tasks to run in parallel (async)
|
300
385
|
|
301
386
|
if self.metric:
|
387
|
+
if self.metric.summary and not unsafe:
|
388
|
+
self.metric.summary = self._get_filtered_summary()
|
389
|
+
|
302
390
|
tasks.append(
|
303
391
|
api_client.log_metrics(
|
304
392
|
metrics=[self.metric],
|
305
393
|
inputs=self.inputs,
|
306
394
|
output_template=self.output_template,
|
395
|
+
section_id=section_id,
|
396
|
+
position=position,
|
307
397
|
)
|
308
398
|
)
|
399
|
+
|
309
400
|
if self.figures:
|
310
401
|
tasks.append(api_client.log_figures(self.figures))
|
402
|
+
|
311
403
|
if hasattr(self, "result_metadata") and self.result_metadata:
|
312
404
|
description = self.result_metadata[0].get("text", "")
|
313
405
|
if isinstance(description, DescriptionFuture):
|
@@ -383,21 +475,25 @@ class ThresholdTestResultWrapper(ResultWrapper):
|
|
383
475
|
"""
|
384
476
|
)
|
385
477
|
|
386
|
-
vbox_children.append(
|
478
|
+
vbox_children.append(HTML(value="".join(description_html)))
|
387
479
|
|
388
480
|
if self.test_results.summary:
|
389
481
|
tables = self._summary_tables_to_widget(self.test_results.summary)
|
390
482
|
vbox_children.extend(tables)
|
391
483
|
|
392
484
|
if self.figures:
|
393
|
-
vbox_children.append(
|
485
|
+
vbox_children.append(HTML(value="<h3>Plots</h3>"))
|
394
486
|
plot_widgets = plot_figures(self.figures)
|
395
487
|
vbox_children.append(plot_widgets)
|
396
488
|
|
397
|
-
return
|
489
|
+
return VBox(vbox_children)
|
398
490
|
|
399
|
-
async def log_async(self):
|
400
|
-
tasks = [
|
491
|
+
async def log_async(self, section_id: str = None, position: int = None):
|
492
|
+
tasks = [
|
493
|
+
api_client.log_test_result(
|
494
|
+
self.test_results, self.inputs, section_id, position
|
495
|
+
)
|
496
|
+
]
|
401
497
|
|
402
498
|
if self.figures:
|
403
499
|
tasks.append(api_client.log_figures(self.figures))
|
@@ -8,11 +8,10 @@ Test (as test_results) but we'll refer to it as a ThresholdTest to
|
|
8
8
|
avoid confusion with the "tests" in the general data science/modeling sense.
|
9
9
|
"""
|
10
10
|
|
11
|
-
import os
|
12
11
|
from dataclasses import dataclass
|
13
12
|
from typing import ClassVar, List, Optional
|
14
13
|
|
15
|
-
from ...
|
14
|
+
from ...utils import get_description_metadata
|
16
15
|
from ..figure import Figure
|
17
16
|
from .result_summary import ResultSummary, ResultTable
|
18
17
|
from .result_wrapper import ThresholdTestResultWrapper
|
@@ -79,30 +78,16 @@ class ThresholdTest(Test):
|
|
79
78
|
"""
|
80
79
|
result_summary = self.summary(test_results_list, passed)
|
81
80
|
|
82
|
-
if (
|
83
|
-
os.environ.get("VALIDMIND_LLM_DESCRIPTIONS_ENABLED", "false").lower()
|
84
|
-
== "true"
|
85
|
-
):
|
86
|
-
revision_name = "Generated by ValidMind AI"
|
87
|
-
description = generate_description(
|
88
|
-
test_name=self.test_id,
|
89
|
-
test_description=self.description().splitlines()[0],
|
90
|
-
test_results=[result.serialize() for result in test_results_list],
|
91
|
-
test_summary=result_summary.serialize(),
|
92
|
-
figures=figures,
|
93
|
-
)
|
94
|
-
else:
|
95
|
-
revision_name = "Default Description"
|
96
|
-
description = self.description()
|
97
|
-
|
98
|
-
description_metadata = {
|
99
|
-
"content_id": f"test_description:{self.test_id}::{revision_name}",
|
100
|
-
"text": description,
|
101
|
-
}
|
102
|
-
|
103
81
|
self.result = ThresholdTestResultWrapper(
|
104
82
|
result_id=self.test_id,
|
105
|
-
result_metadata=[
|
83
|
+
result_metadata=[
|
84
|
+
get_description_metadata(
|
85
|
+
test_id=self.test_id,
|
86
|
+
default_description=self.description(),
|
87
|
+
summary=result_summary.serialize(),
|
88
|
+
figures=figures,
|
89
|
+
)
|
90
|
+
],
|
106
91
|
inputs=self.get_accessed_inputs(),
|
107
92
|
test_results=ThresholdTestResults(
|
108
93
|
test_name=self.test_id,
|
@@ -112,10 +97,7 @@ class ThresholdTest(Test):
|
|
112
97
|
results=test_results_list,
|
113
98
|
summary=result_summary,
|
114
99
|
),
|
100
|
+
figures=figures,
|
115
101
|
)
|
116
102
|
|
117
|
-
# Allow test results to attach figures to the test suite result
|
118
|
-
if figures:
|
119
|
-
self.result.figures = figures
|
120
|
-
|
121
103
|
return self.result
|
@@ -20,7 +20,7 @@ from validmind.input_registry import input_registry
|
|
20
20
|
|
21
21
|
from ..errors import MissingRequiredTestInputError
|
22
22
|
from ..logging import get_logger
|
23
|
-
from .dataset import VMDataset
|
23
|
+
from .dataset.dataset import VMDataset
|
24
24
|
from .model import VMModel
|
25
25
|
|
26
26
|
# More human readable context names for error messages
|
@@ -6,10 +6,9 @@ from dataclasses import dataclass
|
|
6
6
|
from typing import List, Optional
|
7
7
|
|
8
8
|
import ipywidgets as widgets
|
9
|
-
import mistune
|
10
|
-
from IPython.display import display
|
11
9
|
|
12
10
|
from ...logging import get_logger
|
11
|
+
from ...utils import display, md_to_html
|
13
12
|
from ..test.result_wrapper import FailedResultWrapper
|
14
13
|
from .test_suite import TestSuiteSection, TestSuiteTest
|
15
14
|
|
@@ -36,7 +35,7 @@ class TestSuiteSectionSummary:
|
|
36
35
|
self._build_summary()
|
37
36
|
|
38
37
|
def _add_description(self):
|
39
|
-
description = f'<div class="result">{
|
38
|
+
description = f'<div class="result">{md_to_html(self.description)}</div>'
|
40
39
|
self._widgets.append(widgets.HTML(value=description))
|
41
40
|
|
42
41
|
def _add_tests_summary(self):
|
@@ -101,7 +100,7 @@ class TestSuiteSummary:
|
|
101
100
|
self._widgets.append(widgets.HTML(value=results_link))
|
102
101
|
|
103
102
|
def _add_description(self):
|
104
|
-
description = f'<div class="result">{
|
103
|
+
description = f'<div class="result">{md_to_html(self.description)}</div>'
|
105
104
|
self._widgets.append(widgets.HTML(value=description))
|
106
105
|
|
107
106
|
def _add_sections_summary(self):
|