validmind 2.0.1__py3-none-any.whl → 2.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- validmind/__init__.py +6 -3
- validmind/__version__.py +1 -1
- validmind/ai.py +193 -0
- validmind/api_client.py +45 -31
- validmind/client.py +33 -6
- validmind/datasets/classification/customer_churn.py +2 -2
- validmind/datasets/credit_risk/__init__.py +11 -0
- validmind/datasets/credit_risk/datasets/lending_club_loan_data_2007_2014_clean.csv.gz +0 -0
- validmind/datasets/credit_risk/lending_club.py +394 -0
- validmind/datasets/nlp/__init__.py +5 -0
- validmind/datasets/nlp/cnn_dailymail.py +98 -0
- validmind/datasets/nlp/datasets/cnn_dailymail_100_with_predictions.csv +255 -0
- validmind/datasets/nlp/datasets/cnn_dailymail_500_with_predictions.csv +1277 -0
- validmind/datasets/nlp/datasets/sentiments_with_predictions.csv +4847 -0
- validmind/errors.py +11 -1
- validmind/logging.py +9 -2
- validmind/models/huggingface.py +2 -2
- validmind/models/pytorch.py +3 -3
- validmind/models/sklearn.py +4 -4
- validmind/template.py +2 -2
- validmind/test_suites/__init__.py +4 -2
- validmind/tests/__init__.py +130 -45
- validmind/tests/data_validation/DatasetDescription.py +0 -1
- validmind/tests/data_validation/FeatureTargetCorrelationPlot.py +3 -1
- validmind/tests/data_validation/PiTCreditScoresHistogram.py +1 -1
- validmind/tests/data_validation/ScatterPlot.py +8 -2
- validmind/tests/data_validation/nlp/StopWords.py +1 -6
- validmind/tests/data_validation/nlp/TextDescription.py +20 -9
- validmind/tests/decorator.py +313 -0
- validmind/tests/model_validation/BertScore.py +1 -1
- validmind/tests/model_validation/BertScoreAggregate.py +1 -1
- validmind/tests/model_validation/BleuScore.py +1 -1
- validmind/tests/model_validation/ClusterSizeDistribution.py +1 -1
- validmind/tests/model_validation/ContextualRecall.py +1 -1
- validmind/tests/model_validation/FeaturesAUC.py +110 -0
- validmind/tests/model_validation/MeteorScore.py +92 -0
- validmind/tests/model_validation/RegardHistogram.py +6 -7
- validmind/tests/model_validation/RegardScore.py +4 -6
- validmind/tests/model_validation/RegressionResidualsPlot.py +127 -0
- validmind/tests/model_validation/RougeMetrics.py +7 -5
- validmind/tests/model_validation/RougeMetricsAggregate.py +1 -1
- validmind/tests/model_validation/SelfCheckNLIScore.py +112 -0
- validmind/tests/model_validation/TokenDisparity.py +1 -1
- validmind/tests/model_validation/ToxicityHistogram.py +1 -1
- validmind/tests/model_validation/ToxicityScore.py +1 -1
- validmind/tests/model_validation/embeddings/ClusterDistribution.py +1 -1
- validmind/tests/model_validation/embeddings/CosineSimilarityDistribution.py +1 -3
- validmind/tests/model_validation/embeddings/DescriptiveAnalytics.py +17 -22
- validmind/tests/model_validation/embeddings/EmbeddingsVisualization2D.py +1 -1
- validmind/tests/model_validation/sklearn/ClassifierPerformance.py +16 -17
- validmind/tests/model_validation/sklearn/ClusterCosineSimilarity.py +1 -1
- validmind/tests/model_validation/sklearn/ClusterPerformance.py +2 -2
- validmind/tests/model_validation/sklearn/ConfusionMatrix.py +21 -3
- validmind/tests/model_validation/sklearn/MinimumAccuracy.py +1 -1
- validmind/tests/model_validation/sklearn/MinimumF1Score.py +1 -1
- validmind/tests/model_validation/sklearn/MinimumROCAUCScore.py +1 -1
- validmind/tests/model_validation/sklearn/ModelsPerformanceComparison.py +5 -4
- validmind/tests/model_validation/sklearn/OverfitDiagnosis.py +2 -2
- validmind/tests/model_validation/sklearn/ROCCurve.py +6 -12
- validmind/tests/model_validation/sklearn/RegressionErrors.py +2 -2
- validmind/tests/model_validation/sklearn/RegressionModelsPerformanceComparison.py +6 -4
- validmind/tests/model_validation/sklearn/RegressionR2Square.py +2 -2
- validmind/tests/model_validation/sklearn/SHAPGlobalImportance.py +55 -5
- validmind/tests/model_validation/sklearn/SilhouettePlot.py +1 -1
- validmind/tests/model_validation/sklearn/TrainingTestDegradation.py +11 -5
- validmind/tests/model_validation/sklearn/WeakspotsDiagnosis.py +2 -2
- validmind/tests/model_validation/statsmodels/CumulativePredictionProbabilities.py +140 -0
- validmind/tests/model_validation/statsmodels/GINITable.py +22 -45
- validmind/tests/model_validation/statsmodels/{LogisticRegPredictionHistogram.py → PredictionProbabilitiesHistogram.py} +67 -92
- validmind/tests/model_validation/statsmodels/RegressionModelForecastPlot.py +2 -2
- validmind/tests/model_validation/statsmodels/RegressionModelForecastPlotLevels.py +2 -2
- validmind/tests/model_validation/statsmodels/RegressionModelInsampleComparison.py +1 -1
- validmind/tests/model_validation/statsmodels/RegressionModelOutsampleComparison.py +1 -1
- validmind/tests/model_validation/statsmodels/RegressionModelSummary.py +1 -1
- validmind/tests/model_validation/statsmodels/RegressionModelsPerformance.py +2 -2
- validmind/tests/model_validation/statsmodels/RegressionPermutationFeatureImportance.py +128 -0
- validmind/tests/model_validation/statsmodels/ScorecardHistogram.py +70 -103
- validmind/tests/prompt_validation/ai_powered_test.py +2 -0
- validmind/tests/test_providers.py +14 -124
- validmind/unit_metrics/__init__.py +75 -70
- validmind/unit_metrics/classification/sklearn/Accuracy.py +14 -0
- validmind/unit_metrics/classification/sklearn/F1.py +13 -0
- validmind/unit_metrics/classification/sklearn/Precision.py +13 -0
- validmind/unit_metrics/classification/sklearn/ROC_AUC.py +13 -0
- validmind/unit_metrics/classification/sklearn/Recall.py +13 -0
- validmind/unit_metrics/composite.py +228 -0
- validmind/unit_metrics/regression/GiniCoefficient.py +33 -0
- validmind/unit_metrics/regression/HuberLoss.py +23 -0
- validmind/unit_metrics/regression/KolmogorovSmirnovStatistic.py +30 -0
- validmind/unit_metrics/regression/MeanAbsolutePercentageError.py +16 -0
- validmind/unit_metrics/regression/MeanBiasDeviation.py +13 -0
- validmind/unit_metrics/regression/QuantileLoss.py +15 -0
- validmind/unit_metrics/regression/sklearn/AdjustedRSquaredScore.py +21 -0
- validmind/unit_metrics/regression/sklearn/MeanAbsoluteError.py +13 -0
- validmind/unit_metrics/regression/sklearn/MeanSquaredError.py +13 -0
- validmind/unit_metrics/regression/sklearn/RSquaredScore.py +13 -0
- validmind/unit_metrics/regression/sklearn/RootMeanSquaredError.py +20 -0
- validmind/utils.py +20 -31
- validmind/vm_models/__init__.py +0 -2
- validmind/vm_models/dataset.py +623 -29
- validmind/vm_models/figure.py +52 -17
- validmind/vm_models/test/metric.py +33 -31
- validmind/vm_models/test/output_template.py +0 -27
- validmind/vm_models/test/result_wrapper.py +68 -36
- validmind/vm_models/test/test.py +4 -2
- validmind/vm_models/test/threshold_test.py +24 -14
- validmind/vm_models/test_context.py +7 -0
- validmind/vm_models/test_suite/runner.py +1 -1
- validmind/vm_models/test_suite/summary.py +3 -3
- validmind/vm_models/test_suite/test.py +1 -1
- validmind/vm_models/test_suite/test_suite.py +2 -1
- {validmind-2.0.1.dist-info → validmind-2.1.0.dist-info}/METADATA +18 -18
- {validmind-2.0.1.dist-info → validmind-2.1.0.dist-info}/RECORD +116 -94
- validmind-2.1.0.dist-info/entry_points.txt +3 -0
- validmind/tests/__types__.py +0 -62
- validmind/tests/model_validation/statsmodels/LogRegressionConfusionMatrix.py +0 -128
- validmind/tests/model_validation/statsmodels/LogisticRegCumulativeProb.py +0 -172
- validmind/tests/model_validation/statsmodels/ScorecardBucketHistogram.py +0 -181
- validmind/tests/model_validation/statsmodels/ScorecardProbabilitiesHistogram.py +0 -175
- validmind/unit_metrics/sklearn/classification/Accuracy.py +0 -20
- validmind/unit_metrics/sklearn/classification/F1.py +0 -22
- validmind/unit_metrics/sklearn/classification/Precision.py +0 -22
- validmind/unit_metrics/sklearn/classification/ROC_AUC.py +0 -20
- validmind/unit_metrics/sklearn/classification/Recall.py +0 -20
- validmind/vm_models/test/unit_metric.py +0 -88
- {validmind-2.0.1.dist-info → validmind-2.1.0.dist-info}/LICENSE +0 -0
- {validmind-2.0.1.dist-info → validmind-2.1.0.dist-info}/WHEEL +0 -0
validmind/vm_models/figure.py
CHANGED
@@ -21,6 +21,18 @@ from ..errors import InvalidFigureForObjectError, UnsupportedFigureError
|
|
21
21
|
from ..utils import get_full_typename
|
22
22
|
|
23
23
|
|
24
|
+
def is_matplotlib_figure(figure) -> bool:
|
25
|
+
return isinstance(figure, matplotlib.figure.Figure)
|
26
|
+
|
27
|
+
|
28
|
+
def is_plotly_figure(figure) -> bool:
|
29
|
+
return isinstance(figure, (go.Figure, go.FigureWidget))
|
30
|
+
|
31
|
+
|
32
|
+
def is_png_image(figure) -> bool:
|
33
|
+
return isinstance(figure, bytes)
|
34
|
+
|
35
|
+
|
24
36
|
@dataclass
|
25
37
|
class Figure:
|
26
38
|
"""
|
@@ -52,22 +64,10 @@ class Figure:
|
|
52
64
|
if (
|
53
65
|
not client_config.running_on_colab
|
54
66
|
and self.figure
|
55
|
-
and self.
|
67
|
+
and is_plotly_figure(self.figure)
|
56
68
|
):
|
57
69
|
self.figure = go.FigureWidget(self.figure)
|
58
70
|
|
59
|
-
def is_matplotlib_figure(self) -> bool:
|
60
|
-
"""
|
61
|
-
Returns True if the figure is a matplotlib figure
|
62
|
-
"""
|
63
|
-
return isinstance(self.figure, matplotlib.figure.Figure)
|
64
|
-
|
65
|
-
def is_plotly_figure(self) -> bool:
|
66
|
-
"""
|
67
|
-
Returns True if the figure is a plotly figure
|
68
|
-
"""
|
69
|
-
return isinstance(self.figure, (go.Figure, go.FigureWidget))
|
70
|
-
|
71
71
|
def _get_for_object_type(self):
|
72
72
|
"""
|
73
73
|
Returns the type of the object this figure is for
|
@@ -91,7 +91,7 @@ class Figure:
|
|
91
91
|
we would render images as-is, but Plotly FigureWidgets don't work well
|
92
92
|
on Google Colab when they are combined with ipywidgets.
|
93
93
|
"""
|
94
|
-
if self.
|
94
|
+
if is_matplotlib_figure(self.figure):
|
95
95
|
tmpfile = BytesIO()
|
96
96
|
self.figure.savefig(tmpfile, format="png")
|
97
97
|
encoded = base64.b64encode(tmpfile.getvalue()).decode("utf-8")
|
@@ -101,7 +101,7 @@ class Figure:
|
|
101
101
|
"""
|
102
102
|
)
|
103
103
|
|
104
|
-
elif self.
|
104
|
+
elif is_plotly_figure(self.figure):
|
105
105
|
# FigureWidget can be displayed as-is but not on Google Colab. In this case
|
106
106
|
# we just return the image representation of the figure.
|
107
107
|
if client_config.running_on_colab:
|
@@ -114,6 +114,15 @@ class Figure:
|
|
114
114
|
)
|
115
115
|
else:
|
116
116
|
return self.figure
|
117
|
+
|
118
|
+
elif is_png_image(self.figure):
|
119
|
+
encoded = base64.b64encode(self.figure).decode("utf-8")
|
120
|
+
return widgets.HTML(
|
121
|
+
value=f"""
|
122
|
+
<img style="width:100%; height: auto;" src="data:image/png;base64,{encoded}"/>
|
123
|
+
"""
|
124
|
+
)
|
125
|
+
|
117
126
|
else:
|
118
127
|
raise UnsupportedFigureError(
|
119
128
|
f"Figure type {type(self.figure)} not supported for plotting"
|
@@ -129,15 +138,38 @@ class Figure:
|
|
129
138
|
"metadata": json.dumps(self.metadata, allow_nan=False),
|
130
139
|
}
|
131
140
|
|
141
|
+
def _get_b64_url(self):
|
142
|
+
"""
|
143
|
+
Returns a base64 encoded URL for the figure
|
144
|
+
"""
|
145
|
+
if is_matplotlib_figure(self.figure):
|
146
|
+
buffer = BytesIO()
|
147
|
+
self.figure.savefig(buffer, format="png")
|
148
|
+
buffer.seek(0)
|
149
|
+
|
150
|
+
b64_data = base64.b64encode(buffer.read()).decode("utf-8")
|
151
|
+
|
152
|
+
return f"data:image/png;base64,{b64_data}"
|
153
|
+
|
154
|
+
elif is_plotly_figure(self.figure):
|
155
|
+
bytes = self.figure.to_image(format="png")
|
156
|
+
b64_data = base64.b64encode(bytes).decode("utf-8")
|
157
|
+
|
158
|
+
return f"data:image/png;base64,{b64_data}"
|
159
|
+
|
160
|
+
raise UnsupportedFigureError(
|
161
|
+
f"Unrecognized figure type: {get_full_typename(self.figure)}"
|
162
|
+
)
|
163
|
+
|
132
164
|
def serialize_files(self):
|
133
165
|
"""Creates a `requests`-compatible files object to be sent to the API"""
|
134
|
-
if self.
|
166
|
+
if is_matplotlib_figure(self.figure):
|
135
167
|
buffer = BytesIO()
|
136
168
|
self.figure.savefig(buffer, bbox_inches="tight")
|
137
169
|
buffer.seek(0)
|
138
170
|
return {"image": (f"{self.key}.png", buffer, "image/png")}
|
139
171
|
|
140
|
-
elif self.
|
172
|
+
elif is_plotly_figure(self.figure):
|
141
173
|
# When using plotly, we need to use we will produce two files:
|
142
174
|
# - a JSON file that will be used to display the figure in the UI
|
143
175
|
# - a PNG file that will be used to display the figure in documents
|
@@ -154,6 +186,9 @@ class Figure:
|
|
154
186
|
),
|
155
187
|
}
|
156
188
|
|
189
|
+
elif is_png_image(self.figure):
|
190
|
+
return {"image": (f"{self.key}.png", self.figure, "image/png")}
|
191
|
+
|
157
192
|
raise UnsupportedFigureError(
|
158
193
|
f"Unrecognized figure type: {get_full_typename(self.figure)}"
|
159
194
|
)
|
@@ -6,14 +6,15 @@
|
|
6
6
|
Class for storing ValidMind metric objects and associated
|
7
7
|
data for display and reporting purposes
|
8
8
|
"""
|
9
|
+
import os
|
9
10
|
from abc import abstractmethod
|
10
11
|
from dataclasses import dataclass
|
11
12
|
from typing import ClassVar, List, Optional, Union
|
12
13
|
|
13
14
|
import pandas as pd
|
14
15
|
|
16
|
+
from ...ai import generate_description
|
15
17
|
from ...errors import MissingCacheResultsArgumentsError
|
16
|
-
from ...utils import clean_docstring
|
17
18
|
from ..figure import Figure
|
18
19
|
from .metric_result import MetricResult
|
19
20
|
from .result_wrapper import MetricResultWrapper
|
@@ -74,41 +75,42 @@ class Metric(Test):
|
|
74
75
|
"Metric must provide a metric value or figures to cache_results"
|
75
76
|
)
|
76
77
|
|
77
|
-
|
78
|
-
result_metadata = [
|
79
|
-
{
|
80
|
-
"content_id": f"metric_description:{self.test_id}",
|
81
|
-
"text": clean_docstring(self.description()),
|
82
|
-
}
|
83
|
-
]
|
84
|
-
|
85
|
-
result_summary = self.summary(metric_value)
|
86
|
-
|
87
|
-
result_wrapper = MetricResultWrapper(
|
88
|
-
result_id=self.test_id,
|
89
|
-
result_metadata=result_metadata,
|
90
|
-
inputs=self.get_accessed_inputs(),
|
91
|
-
output_template=self.output_template,
|
92
|
-
)
|
93
|
-
|
94
|
-
# We can send an empty result to push an empty metric with a summary and plots
|
95
|
-
metric_result_value = metric_value if metric_value is not None else {}
|
96
|
-
|
97
|
-
result_wrapper.metric = MetricResult(
|
98
|
-
# key=self.key,
|
99
|
-
# Now using the fully qualified test ID as `key`.
|
100
|
-
# Ideally the backend is updated to use `test_id` instead of `key`.
|
78
|
+
metric = MetricResult(
|
101
79
|
key=self.test_id,
|
102
80
|
ref_id=self._ref_id,
|
103
|
-
value=
|
81
|
+
value=metric_value if metric_value is not None else {},
|
104
82
|
value_formatter=self.value_formatter,
|
105
|
-
summary=
|
83
|
+
summary=self.summary(metric_value),
|
106
84
|
)
|
107
85
|
|
108
|
-
|
109
|
-
|
110
|
-
|
86
|
+
if (
|
87
|
+
os.environ.get("VALIDMIND_LLM_DESCRIPTIONS_ENABLED", "false").lower()
|
88
|
+
== "true"
|
89
|
+
):
|
90
|
+
revision_name = "Generated by ValidMind AI"
|
91
|
+
description = generate_description(
|
92
|
+
test_name=self.test_id,
|
93
|
+
test_description=self.description().splitlines()[0],
|
94
|
+
test_results=metric.serialize()["value"],
|
95
|
+
test_summary=metric.serialize()["summary"],
|
96
|
+
figures=figures,
|
97
|
+
)
|
98
|
+
else:
|
99
|
+
revision_name = "Default Description"
|
100
|
+
description = self.description()
|
101
|
+
|
102
|
+
description_metadata = {
|
103
|
+
"content_id": f"metric_description:{self.test_id}::{revision_name}",
|
104
|
+
"text": description,
|
105
|
+
}
|
111
106
|
|
112
|
-
self.result =
|
107
|
+
self.result = MetricResultWrapper(
|
108
|
+
result_id=self.test_id,
|
109
|
+
result_metadata=[description_metadata],
|
110
|
+
metric=metric,
|
111
|
+
figures=figures,
|
112
|
+
inputs=self.get_accessed_inputs(),
|
113
|
+
output_template=self.output_template,
|
114
|
+
)
|
113
115
|
|
114
116
|
return self.result
|
@@ -4,7 +4,6 @@
|
|
4
4
|
|
5
5
|
from datetime import datetime
|
6
6
|
|
7
|
-
from bs4 import BeautifulSoup
|
8
7
|
from dateutil import parser
|
9
8
|
from jinja2 import Environment
|
10
9
|
|
@@ -54,29 +53,3 @@ class OutputTemplate:
|
|
54
53
|
value=value,
|
55
54
|
metric_history=values_history,
|
56
55
|
)
|
57
|
-
|
58
|
-
def parse_summary_from_html(rendered_template_html):
|
59
|
-
soup = BeautifulSoup(rendered_template_html, "html.parser")
|
60
|
-
|
61
|
-
# find all `<table>` elements
|
62
|
-
tables = soup.find_all("table")
|
63
|
-
tables_data = []
|
64
|
-
|
65
|
-
for table in tables:
|
66
|
-
headers = [cell.text for cell in table.find_all("th")]
|
67
|
-
|
68
|
-
tables_data.append(
|
69
|
-
{
|
70
|
-
"type": "table",
|
71
|
-
"data": [
|
72
|
-
{
|
73
|
-
headers[i]: cell.text
|
74
|
-
for i, cell in enumerate(row.find_all("td"))
|
75
|
-
}
|
76
|
-
for row in table.find("tbody").find_all("tr")
|
77
|
-
],
|
78
|
-
"metadata": {"title": ""}, # TODO: add title
|
79
|
-
}
|
80
|
-
)
|
81
|
-
|
82
|
-
return tables_data
|
@@ -10,14 +10,15 @@ import json
|
|
10
10
|
import os
|
11
11
|
from abc import ABC, abstractmethod
|
12
12
|
from dataclasses import dataclass
|
13
|
-
from typing import List, Optional
|
13
|
+
from typing import Dict, List, Optional, Union
|
14
14
|
|
15
15
|
import ipywidgets as widgets
|
16
|
-
import
|
16
|
+
import mistune
|
17
17
|
import pandas as pd
|
18
18
|
from IPython.display import display
|
19
19
|
|
20
20
|
from ... import api_client
|
21
|
+
from ...ai import DescriptionFuture
|
21
22
|
from ...utils import NumpyEncoder, run_async, test_id_to_name
|
22
23
|
from ..figure import Figure
|
23
24
|
from .metric_result import MetricResult
|
@@ -26,24 +27,33 @@ from .result_summary import ResultSummary
|
|
26
27
|
from .threshold_test_result import ThresholdTestResults
|
27
28
|
|
28
29
|
|
29
|
-
async def update_metadata(content_id: str, text: str
|
30
|
+
async def update_metadata(content_id: str, text: str, _json: Union[Dict, List] = None):
|
30
31
|
"""
|
31
32
|
Update the metadata of a content item. By default we don't
|
32
33
|
override the existing metadata, but we can override it by
|
33
34
|
setting the VM_OVERRIDE_METADATA environment variable to True
|
34
35
|
"""
|
35
|
-
|
36
|
-
try:
|
37
|
-
existing_metadata = await api_client.get_metadata(content_id)
|
38
|
-
except Exception:
|
39
|
-
existing_metadata = None # TODO: handle this better
|
36
|
+
should_update = False
|
40
37
|
|
41
|
-
if
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
38
|
+
# check if the env variable is set to force overwriting metadata
|
39
|
+
if os.environ.get("VM_OVERRIDE_METADATA", "false").lower() == "true":
|
40
|
+
should_update = True
|
41
|
+
|
42
|
+
# if not set, check if the content_id is a composite metric def
|
43
|
+
if not should_update and content_id.startswith("composite_metric_def:"):
|
44
|
+
# we always want composite metric definitions to be updated
|
45
|
+
should_update = True
|
46
|
+
|
47
|
+
# if not set, lets check if the metadata already exists
|
48
|
+
if not should_update:
|
49
|
+
try:
|
50
|
+
await api_client.get_metadata(content_id)
|
51
|
+
except Exception: # TODO: this shouldn't be a catch-all
|
52
|
+
# if the metadata doesn't exist, we should create (update) it
|
53
|
+
should_update = True
|
54
|
+
|
55
|
+
if should_update:
|
56
|
+
await api_client.log_metadata(content_id, text, _json)
|
47
57
|
|
48
58
|
|
49
59
|
def plot_figures(figures: List[Figure]) -> None:
|
@@ -93,8 +103,7 @@ class ResultWrapper(ABC):
|
|
93
103
|
"""
|
94
104
|
Convert a markdown string to html
|
95
105
|
"""
|
96
|
-
|
97
|
-
return markdown.markdown(description, extensions=["markdown.extensions.tables"])
|
106
|
+
return mistune.html(description)
|
98
107
|
|
99
108
|
def _summary_tables_to_widget(self, summary: ResultSummary):
|
100
109
|
"""
|
@@ -111,21 +120,19 @@ class ResultWrapper(ABC):
|
|
111
120
|
[
|
112
121
|
{
|
113
122
|
"selector": "",
|
114
|
-
"props": [
|
115
|
-
|
116
|
-
|
123
|
+
"props": [("width", "100%")],
|
124
|
+
},
|
125
|
+
{
|
126
|
+
"selector": "th",
|
127
|
+
"props": [("text-align", "left")],
|
117
128
|
},
|
118
129
|
{
|
119
130
|
"selector": "tbody tr:nth-child(even)",
|
120
|
-
"props": [
|
121
|
-
("background-color", "#FFFFFF"),
|
122
|
-
],
|
131
|
+
"props": [("background-color", "#FFFFFF")],
|
123
132
|
},
|
124
133
|
{
|
125
134
|
"selector": "tbody tr:nth-child(odd)",
|
126
|
-
"props": [
|
127
|
-
("background-color", "#F5F5F5"),
|
128
|
-
],
|
135
|
+
"props": [("background-color", "#F5F5F5")],
|
129
136
|
},
|
130
137
|
{
|
131
138
|
"selector": "td, th",
|
@@ -135,7 +142,8 @@ class ResultWrapper(ABC):
|
|
135
142
|
],
|
136
143
|
},
|
137
144
|
]
|
138
|
-
)
|
145
|
+
)
|
146
|
+
.set_properties(**{"text-align": "left"})
|
139
147
|
.to_html(escape=False)
|
140
148
|
) # table.data is an orient=records dump
|
141
149
|
|
@@ -155,7 +163,7 @@ class ResultWrapper(ABC):
|
|
155
163
|
|
156
164
|
def log(self):
|
157
165
|
"""Log the result... May be overridden by subclasses"""
|
158
|
-
|
166
|
+
run_async(self.log_async)
|
159
167
|
|
160
168
|
|
161
169
|
@dataclass
|
@@ -207,15 +215,19 @@ class MetricResultWrapper(ResultWrapper):
|
|
207
215
|
if self.metric and self.metric.key == "dataset_description":
|
208
216
|
return ""
|
209
217
|
|
210
|
-
vbox_children = [
|
218
|
+
vbox_children = [
|
219
|
+
widgets.HTML(value=f"<h1>{test_id_to_name(self.result_id)}</h1>"),
|
220
|
+
]
|
211
221
|
|
212
222
|
if self.result_metadata:
|
213
|
-
metric_description = self.result_metadata[0]
|
223
|
+
metric_description = self.result_metadata[0].get("text", "")
|
224
|
+
if isinstance(metric_description, DescriptionFuture):
|
225
|
+
metric_description = metric_description.get_description()
|
226
|
+
self.result_metadata[0]["text"] = metric_description
|
227
|
+
|
214
228
|
vbox_children.append(
|
215
229
|
widgets.HTML(
|
216
|
-
value=self._markdown_description_to_html(
|
217
|
-
metric_description.get("text", "")
|
218
|
-
)
|
230
|
+
value=self._markdown_description_to_html(metric_description)
|
219
231
|
)
|
220
232
|
)
|
221
233
|
|
@@ -297,8 +309,19 @@ class MetricResultWrapper(ResultWrapper):
|
|
297
309
|
if self.figures:
|
298
310
|
tasks.append(api_client.log_figures(self.figures))
|
299
311
|
if hasattr(self, "result_metadata") and self.result_metadata:
|
312
|
+
description = self.result_metadata[0].get("text", "")
|
313
|
+
if isinstance(description, DescriptionFuture):
|
314
|
+
description = description.get_description()
|
315
|
+
self.result_metadata[0]["text"] = description
|
316
|
+
|
300
317
|
for metadata in self.result_metadata:
|
301
|
-
tasks.append(
|
318
|
+
tasks.append(
|
319
|
+
update_metadata(
|
320
|
+
content_id=metadata["content_id"],
|
321
|
+
text=metadata.get("text", ""),
|
322
|
+
_json=metadata.get("json"),
|
323
|
+
)
|
324
|
+
)
|
302
325
|
|
303
326
|
await asyncio.gather(*tasks)
|
304
327
|
|
@@ -339,14 +362,18 @@ class ThresholdTestResultWrapper(ResultWrapper):
|
|
339
362
|
test_title = test_id_to_name(self.test_results.test_name)
|
340
363
|
description_html.append(
|
341
364
|
f"""
|
342
|
-
<
|
365
|
+
<h1>{test_title} {"✅" if self.test_results.passed else "❌"}</h1>
|
343
366
|
"""
|
344
367
|
)
|
345
368
|
|
346
369
|
if self.result_metadata:
|
347
|
-
metric_description = self.result_metadata[0]
|
370
|
+
metric_description = self.result_metadata[0].get("text", "")
|
371
|
+
if isinstance(metric_description, DescriptionFuture):
|
372
|
+
metric_description = metric_description.get_description()
|
373
|
+
self.result_metadata[0]["text"] = metric_description
|
374
|
+
|
348
375
|
description_html.append(
|
349
|
-
self._markdown_description_to_html(metric_description
|
376
|
+
self._markdown_description_to_html(metric_description)
|
350
377
|
)
|
351
378
|
|
352
379
|
description_html.append(
|
@@ -375,6 +402,11 @@ class ThresholdTestResultWrapper(ResultWrapper):
|
|
375
402
|
if self.figures:
|
376
403
|
tasks.append(api_client.log_figures(self.figures))
|
377
404
|
if hasattr(self, "result_metadata") and self.result_metadata:
|
405
|
+
description = self.result_metadata[0].get("text", "")
|
406
|
+
if isinstance(description, DescriptionFuture):
|
407
|
+
description = description.get_description()
|
408
|
+
self.result_metadata[0]["text"] = description
|
409
|
+
|
378
410
|
for metadata in self.result_metadata:
|
379
411
|
tasks.append(update_metadata(metadata["content_id"], metadata["text"]))
|
380
412
|
|
validmind/vm_models/test/test.py
CHANGED
@@ -6,10 +6,12 @@
|
|
6
6
|
|
7
7
|
from abc import abstractmethod
|
8
8
|
from dataclasses import dataclass
|
9
|
+
from inspect import getdoc
|
9
10
|
from typing import ClassVar, List, TypedDict
|
10
11
|
from uuid import uuid4
|
11
12
|
|
12
13
|
from ..test_context import TestUtils
|
14
|
+
from .result_wrapper import ResultWrapper
|
13
15
|
|
14
16
|
|
15
17
|
class TestMetadata(TypedDict):
|
@@ -35,7 +37,7 @@ class Test(TestUtils):
|
|
35
37
|
_ref_id: str = None # unique identifier (populated at init)
|
36
38
|
_section_id: str = None # which section of template this test belongs to
|
37
39
|
test_id: str = None # populated when loading tests from suites
|
38
|
-
result:
|
40
|
+
result: ResultWrapper = None # type should be overridden by parent classes
|
39
41
|
|
40
42
|
params: dict = None # populated by test suite from user-passed config
|
41
43
|
|
@@ -65,7 +67,7 @@ class Test(TestUtils):
|
|
65
67
|
Return the test description. May be overridden by subclasses. Defaults
|
66
68
|
to returning the class' docstring
|
67
69
|
"""
|
68
|
-
return self.
|
70
|
+
return getdoc(self).strip()
|
69
71
|
|
70
72
|
@abstractmethod
|
71
73
|
def summary(self, *args, **kwargs):
|
@@ -8,10 +8,11 @@ Test (as test_results) but we'll refer to it as a ThresholdTest to
|
|
8
8
|
avoid confusion with the "tests" in the general data science/modeling sense.
|
9
9
|
"""
|
10
10
|
|
11
|
+
import os
|
11
12
|
from dataclasses import dataclass
|
12
13
|
from typing import ClassVar, List, Optional
|
13
14
|
|
14
|
-
from ...
|
15
|
+
from ...ai import generate_description
|
15
16
|
from ..figure import Figure
|
16
17
|
from .result_summary import ResultSummary, ResultTable
|
17
18
|
from .result_wrapper import ThresholdTestResultWrapper
|
@@ -76,25 +77,34 @@ class ThresholdTest(Test):
|
|
76
77
|
Returns:
|
77
78
|
TestSuiteResult: The test suite result object
|
78
79
|
"""
|
79
|
-
# Rename to self.result
|
80
|
-
# At a minimum, send the test description
|
81
|
-
result_metadata = [
|
82
|
-
{
|
83
|
-
"content_id": f"test_description:{self.test_id}",
|
84
|
-
"text": clean_docstring(self.description()),
|
85
|
-
}
|
86
|
-
]
|
87
|
-
|
88
80
|
result_summary = self.summary(test_results_list, passed)
|
89
81
|
|
82
|
+
if (
|
83
|
+
os.environ.get("VALIDMIND_LLM_DESCRIPTIONS_ENABLED", "false").lower()
|
84
|
+
== "true"
|
85
|
+
):
|
86
|
+
revision_name = "Generated by ValidMind AI"
|
87
|
+
description = generate_description(
|
88
|
+
test_name=self.test_id,
|
89
|
+
test_description=self.description().splitlines()[0],
|
90
|
+
test_results=[result.serialize() for result in test_results_list],
|
91
|
+
test_summary=result_summary.serialize(),
|
92
|
+
figures=figures,
|
93
|
+
)
|
94
|
+
else:
|
95
|
+
revision_name = "Default Description"
|
96
|
+
description = self.description()
|
97
|
+
|
98
|
+
description_metadata = {
|
99
|
+
"content_id": f"test_description:{self.test_id}::{revision_name}",
|
100
|
+
"text": description,
|
101
|
+
}
|
102
|
+
|
90
103
|
self.result = ThresholdTestResultWrapper(
|
91
104
|
result_id=self.test_id,
|
92
|
-
result_metadata=
|
105
|
+
result_metadata=[description_metadata],
|
93
106
|
inputs=self.get_accessed_inputs(),
|
94
107
|
test_results=ThresholdTestResults(
|
95
|
-
# test_name=self.name,
|
96
|
-
# Now using the fully qualified test ID as `test_name`.
|
97
|
-
# Ideally the backend is updated to use `test_id` instead of `test_name`.
|
98
108
|
test_name=self.test_id,
|
99
109
|
ref_id=self._ref_id,
|
100
110
|
params=self.params,
|
@@ -165,6 +165,13 @@ class TestUtils:
|
|
165
165
|
|
166
166
|
return []
|
167
167
|
|
168
|
+
def _get_input_dict(self):
|
169
|
+
"""Return a dictionary of all inputs"""
|
170
|
+
if isinstance(self.inputs, InputAccessTrackerProxy):
|
171
|
+
return self.inputs._inputs.__dict__
|
172
|
+
|
173
|
+
return self.inputs.__dict__
|
174
|
+
|
168
175
|
def _get_legacy_input(self, key):
|
169
176
|
"""Retrieve an input from the Test Input or, for backwards compatibility,
|
170
177
|
the Test Context
|
@@ -6,10 +6,10 @@ from dataclasses import dataclass
|
|
6
6
|
from typing import List, Optional
|
7
7
|
|
8
8
|
import ipywidgets as widgets
|
9
|
+
import mistune
|
9
10
|
from IPython.display import display
|
10
11
|
|
11
12
|
from ...logging import get_logger
|
12
|
-
from ...utils import clean_docstring
|
13
13
|
from ..test.result_wrapper import FailedResultWrapper
|
14
14
|
from .test_suite import TestSuiteSection, TestSuiteTest
|
15
15
|
|
@@ -36,7 +36,7 @@ class TestSuiteSectionSummary:
|
|
36
36
|
self._build_summary()
|
37
37
|
|
38
38
|
def _add_description(self):
|
39
|
-
description = f'<div class="result">{
|
39
|
+
description = f'<div class="result">{mistune.html(self.description)}</div>'
|
40
40
|
self._widgets.append(widgets.HTML(value=description))
|
41
41
|
|
42
42
|
def _add_tests_summary(self):
|
@@ -101,7 +101,7 @@ class TestSuiteSummary:
|
|
101
101
|
self._widgets.append(widgets.HTML(value=results_link))
|
102
102
|
|
103
103
|
def _add_description(self):
|
104
|
-
description = f'<div class="result">{
|
104
|
+
description = f'<div class="result">{mistune.html(self.description)}</div>'
|
105
105
|
self._widgets.append(widgets.HTML(value=description))
|
106
106
|
|
107
107
|
def _add_sections_summary(self):
|
@@ -151,7 +151,7 @@ class TestSuiteTest:
|
|
151
151
|
|
152
152
|
self.result = self._test_instance.result
|
153
153
|
|
154
|
-
async def
|
154
|
+
async def log_async(self):
|
155
155
|
"""Log the result for this test to ValidMind"""
|
156
156
|
if not self.result:
|
157
157
|
raise ValueError("Cannot log test result before running the test")
|
@@ -7,6 +7,7 @@ Base class for test suites and test suite sections
|
|
7
7
|
"""
|
8
8
|
|
9
9
|
from dataclasses import dataclass
|
10
|
+
from inspect import getdoc
|
10
11
|
from typing import ClassVar, List, Optional, Union
|
11
12
|
|
12
13
|
from ...logging import get_logger
|
@@ -154,7 +155,7 @@ class TestSuite:
|
|
154
155
|
|
155
156
|
@property
|
156
157
|
def description(self):
|
157
|
-
return self.
|
158
|
+
return getdoc(self).strip()
|
158
159
|
|
159
160
|
@property
|
160
161
|
def title(self):
|