validmind 2.6.10__py3-none-any.whl → 2.7.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- validmind/__init__.py +2 -0
- validmind/__version__.py +1 -1
- validmind/ai/test_descriptions.py +20 -4
- validmind/ai/test_result_description/user.jinja +5 -0
- validmind/datasets/credit_risk/lending_club.py +444 -14
- validmind/tests/data_validation/MutualInformation.py +129 -0
- validmind/tests/data_validation/ScoreBandDefaultRates.py +139 -0
- validmind/tests/data_validation/TooManyZeroValues.py +6 -5
- validmind/tests/data_validation/UniqueRows.py +3 -1
- validmind/tests/decorator.py +18 -16
- validmind/tests/model_validation/sklearn/CalibrationCurve.py +116 -0
- validmind/tests/model_validation/sklearn/ClassifierThresholdOptimization.py +261 -0
- validmind/tests/model_validation/sklearn/ConfusionMatrix.py +1 -0
- validmind/tests/model_validation/sklearn/HyperParametersTuning.py +144 -56
- validmind/tests/model_validation/sklearn/ModelParameters.py +74 -0
- validmind/tests/model_validation/sklearn/ROCCurve.py +26 -23
- validmind/tests/model_validation/sklearn/ScoreProbabilityAlignment.py +130 -0
- validmind/tests/model_validation/statsmodels/CumulativePredictionProbabilities.py +5 -6
- validmind/tests/model_validation/statsmodels/PredictionProbabilitiesHistogram.py +2 -3
- validmind/tests/output.py +10 -1
- validmind/tests/run.py +52 -54
- validmind/utils.py +34 -7
- validmind/vm_models/figure.py +15 -0
- validmind/vm_models/result/__init__.py +2 -2
- validmind/vm_models/result/result.py +136 -23
- {validmind-2.6.10.dist-info → validmind-2.7.4.dist-info}/METADATA +1 -1
- {validmind-2.6.10.dist-info → validmind-2.7.4.dist-info}/RECORD +30 -24
- {validmind-2.6.10.dist-info → validmind-2.7.4.dist-info}/LICENSE +0 -0
- {validmind-2.6.10.dist-info → validmind-2.7.4.dist-info}/WHEEL +0 -0
- {validmind-2.6.10.dist-info → validmind-2.7.4.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,130 @@
|
|
1
|
+
# Copyright © 2023-2024 ValidMind Inc. All rights reserved.
|
2
|
+
# See the LICENSE file in the root of this repository for details.
|
3
|
+
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
|
+
|
5
|
+
import pandas as pd
|
6
|
+
import plotly.graph_objects as go
|
7
|
+
from validmind import tags, tasks
|
8
|
+
from validmind.vm_models import VMModel, VMDataset
|
9
|
+
|
10
|
+
|
11
|
+
@tags("visualization", "credit_risk", "calibration")
|
12
|
+
@tasks("classification")
|
13
|
+
def ScoreProbabilityAlignment(
|
14
|
+
model: VMModel, dataset: VMDataset, score_column: str = "score", n_bins: int = 10
|
15
|
+
):
|
16
|
+
"""
|
17
|
+
Analyzes the alignment between credit scores and predicted probabilities.
|
18
|
+
|
19
|
+
### Purpose
|
20
|
+
|
21
|
+
The Score-Probability Alignment test evaluates how well credit scores align with
|
22
|
+
predicted default probabilities. This helps validate score scaling, identify potential
|
23
|
+
calibration issues, and ensure scores reflect risk appropriately.
|
24
|
+
|
25
|
+
### Test Mechanism
|
26
|
+
|
27
|
+
The test:
|
28
|
+
1. Groups scores into bins
|
29
|
+
2. Calculates average predicted probability per bin
|
30
|
+
3. Tests monotonicity of relationship
|
31
|
+
4. Analyzes probability distribution within score bands
|
32
|
+
|
33
|
+
### Signs of High Risk
|
34
|
+
|
35
|
+
- Non-monotonic relationship between scores and probabilities
|
36
|
+
- Large probability variations within score bands
|
37
|
+
- Unexpected probability jumps between adjacent bands
|
38
|
+
- Poor alignment with expected odds-to-score relationship
|
39
|
+
- Inconsistent probability patterns across score ranges
|
40
|
+
- Clustering of probabilities at extreme values
|
41
|
+
- Score bands with similar probability profiles
|
42
|
+
- Unstable probability estimates in key decision bands
|
43
|
+
|
44
|
+
### Strengths
|
45
|
+
|
46
|
+
- Direct validation of score-to-probability relationship
|
47
|
+
- Identifies potential calibration issues
|
48
|
+
- Supports score band validation
|
49
|
+
- Helps understand model behavior
|
50
|
+
- Useful for policy setting
|
51
|
+
- Visual and numerical results
|
52
|
+
- Easy to interpret
|
53
|
+
- Supports regulatory documentation
|
54
|
+
|
55
|
+
### Limitations
|
56
|
+
|
57
|
+
- Sensitive to bin selection
|
58
|
+
- Requires sufficient data per bin
|
59
|
+
- May mask within-bin variations
|
60
|
+
- Point-in-time analysis only
|
61
|
+
- Cannot detect all forms of miscalibration
|
62
|
+
- Assumes scores should align with probabilities
|
63
|
+
- May oversimplify complex relationships
|
64
|
+
- Limited to binary outcomes
|
65
|
+
"""
|
66
|
+
if score_column not in dataset.df.columns:
|
67
|
+
raise ValueError(f"Score column '{score_column}' not found in dataset")
|
68
|
+
|
69
|
+
# Get predicted probabilities
|
70
|
+
y_prob = dataset.y_prob(model)
|
71
|
+
|
72
|
+
# Create score bins
|
73
|
+
df = dataset.df.copy()
|
74
|
+
df["probability"] = y_prob
|
75
|
+
|
76
|
+
# Create score bins with equal width
|
77
|
+
df["score_bin"] = pd.qcut(df[score_column], n_bins, duplicates="drop")
|
78
|
+
|
79
|
+
# Calculate statistics per bin
|
80
|
+
results = []
|
81
|
+
for bin_name, group in df.groupby("score_bin"):
|
82
|
+
bin_stats = {
|
83
|
+
"Score Range": f"{bin_name.left:.0f}-{bin_name.right:.0f}",
|
84
|
+
"Mean Score": group[score_column].mean(),
|
85
|
+
"Population Count": len(group),
|
86
|
+
"Population (%)": len(group) / len(df) * 100,
|
87
|
+
"Mean Probability (%)": group["probability"].mean() * 100,
|
88
|
+
"Min Probability (%)": group["probability"].min() * 100,
|
89
|
+
"Max Probability (%)": group["probability"].max() * 100,
|
90
|
+
"Probability Std": group["probability"].std() * 100,
|
91
|
+
}
|
92
|
+
results.append(bin_stats)
|
93
|
+
|
94
|
+
results_df = pd.DataFrame(results)
|
95
|
+
|
96
|
+
# Create visualization
|
97
|
+
fig = go.Figure()
|
98
|
+
|
99
|
+
# Add probability range
|
100
|
+
fig.add_trace(
|
101
|
+
go.Scatter(
|
102
|
+
x=results_df["Mean Score"],
|
103
|
+
y=results_df["Mean Probability (%)"],
|
104
|
+
mode="lines+markers",
|
105
|
+
name="Mean Probability",
|
106
|
+
line=dict(color="blue"),
|
107
|
+
error_y=dict(
|
108
|
+
type="data",
|
109
|
+
symmetric=False,
|
110
|
+
array=results_df["Max Probability (%)"]
|
111
|
+
- results_df["Mean Probability (%)"],
|
112
|
+
arrayminus=results_df["Mean Probability (%)"]
|
113
|
+
- results_df["Min Probability (%)"],
|
114
|
+
color="gray",
|
115
|
+
),
|
116
|
+
)
|
117
|
+
)
|
118
|
+
|
119
|
+
# Update layout
|
120
|
+
fig.update_layout(
|
121
|
+
title="Score-Probability Alignment",
|
122
|
+
xaxis_title="Score",
|
123
|
+
yaxis_title="Default Probability (%)",
|
124
|
+
showlegend=True,
|
125
|
+
template="plotly_white",
|
126
|
+
width=800,
|
127
|
+
height=600,
|
128
|
+
)
|
129
|
+
|
130
|
+
return results_df, fig
|
@@ -9,22 +9,21 @@ from matplotlib import cm
|
|
9
9
|
from validmind import tags, tasks
|
10
10
|
|
11
11
|
|
12
|
-
@tags("visualization", "credit_risk"
|
12
|
+
@tags("visualization", "credit_risk")
|
13
13
|
@tasks("classification")
|
14
14
|
def CumulativePredictionProbabilities(dataset, model, title="Cumulative Probabilities"):
|
15
15
|
"""
|
16
|
-
Visualizes cumulative probabilities of positive and negative classes for both training and testing in
|
17
|
-
regression models.
|
16
|
+
Visualizes cumulative probabilities of positive and negative classes for both training and testing in classification models.
|
18
17
|
|
19
18
|
### Purpose
|
20
19
|
|
21
20
|
This metric is utilized to evaluate the distribution of predicted probabilities for positive and negative classes
|
22
|
-
in a
|
21
|
+
in a classification model. It provides a visual assessment of the model's behavior by plotting the cumulative
|
23
22
|
probabilities for positive and negative classes across both the training and test datasets.
|
24
23
|
|
25
24
|
### Test Mechanism
|
26
25
|
|
27
|
-
The
|
26
|
+
The classification model is evaluated by first computing the predicted probabilities for each instance in both
|
28
27
|
the training and test datasets, which are then added as a new column in these sets. The cumulative probabilities
|
29
28
|
for positive and negative classes are subsequently calculated and sorted in ascending order. Cumulative
|
30
29
|
distributions of these probabilities are created for both positive and negative classes across both training and
|
@@ -51,7 +50,7 @@ def CumulativePredictionProbabilities(dataset, model, title="Cumulative Probabil
|
|
51
50
|
|
52
51
|
### Limitations
|
53
52
|
|
54
|
-
- Exclusive to classification tasks and specifically to
|
53
|
+
- Exclusive to classification tasks and specifically to classification models.
|
55
54
|
- Graphical results necessitate human interpretation and may not be directly applicable for automated risk
|
56
55
|
detection.
|
57
56
|
- The method does not give a solitary quantifiable measure of model risk, instead, it offers a visual
|
@@ -9,7 +9,7 @@ from matplotlib import cm
|
|
9
9
|
from validmind import tags, tasks
|
10
10
|
|
11
11
|
|
12
|
-
@tags("visualization", "credit_risk"
|
12
|
+
@tags("visualization", "credit_risk")
|
13
13
|
@tasks("classification")
|
14
14
|
def PredictionProbabilitiesHistogram(
|
15
15
|
dataset, model, title="Histogram of Predictive Probabilities"
|
@@ -22,7 +22,7 @@ def PredictionProbabilitiesHistogram(
|
|
22
22
|
|
23
23
|
The Prediction Probabilities Histogram test is designed to generate histograms displaying the Probability of
|
24
24
|
Default (PD) predictions for both positive and negative classes in training and testing datasets. This helps in
|
25
|
-
evaluating the performance of a
|
25
|
+
evaluating the performance of a classification model.
|
26
26
|
|
27
27
|
### Test Mechanism
|
28
28
|
|
@@ -52,7 +52,6 @@ def PredictionProbabilitiesHistogram(
|
|
52
52
|
### Limitations
|
53
53
|
|
54
54
|
- Specifically tailored for binary classification scenarios and not suited for multi-class classification tasks.
|
55
|
-
- Mainly applicable to logistic regression models, and may not be effective for other model types.
|
56
55
|
- Provides a robust visual representation but lacks a quantifiable measure to assess model performance.
|
57
56
|
"""
|
58
57
|
|
validmind/tests/output.py
CHANGED
@@ -15,7 +15,7 @@ from validmind.vm_models.figure import (
|
|
15
15
|
is_plotly_figure,
|
16
16
|
is_png_image,
|
17
17
|
)
|
18
|
-
from validmind.vm_models.result import ResultTable, TestResult
|
18
|
+
from validmind.vm_models.result import RawData, ResultTable, TestResult
|
19
19
|
|
20
20
|
|
21
21
|
class OutputHandler(ABC):
|
@@ -103,6 +103,14 @@ class TableOutputHandler(OutputHandler):
|
|
103
103
|
result.add_table(ResultTable(data=table_data, title=table_name or None))
|
104
104
|
|
105
105
|
|
106
|
+
class RawDataOutputHandler(OutputHandler):
|
107
|
+
def can_handle(self, item: Any) -> bool:
|
108
|
+
return isinstance(item, RawData)
|
109
|
+
|
110
|
+
def process(self, item: Any, result: TestResult) -> None:
|
111
|
+
result.raw_data = item
|
112
|
+
|
113
|
+
|
106
114
|
def process_output(item: Any, result: TestResult) -> None:
|
107
115
|
"""Process a single test output item and update the TestResult."""
|
108
116
|
handlers = [
|
@@ -110,6 +118,7 @@ def process_output(item: Any, result: TestResult) -> None:
|
|
110
118
|
MetricOutputHandler(),
|
111
119
|
FigureOutputHandler(),
|
112
120
|
TableOutputHandler(),
|
121
|
+
RawDataOutputHandler(),
|
113
122
|
]
|
114
123
|
|
115
124
|
for handler in handlers:
|
validmind/tests/run.py
CHANGED
@@ -7,7 +7,7 @@ import subprocess
|
|
7
7
|
import time
|
8
8
|
from datetime import datetime
|
9
9
|
from inspect import getdoc
|
10
|
-
from typing import Any, Dict, List, Optional, Tuple, Union
|
10
|
+
from typing import Any, Callable, Dict, List, Optional, Tuple, Union
|
11
11
|
from uuid import uuid4
|
12
12
|
|
13
13
|
from validmind import __version__
|
@@ -134,10 +134,9 @@ def _get_test_kwargs(
|
|
134
134
|
def build_test_result(
|
135
135
|
outputs: Union[Any, Tuple[Any, ...]],
|
136
136
|
test_id: str,
|
137
|
+
test_doc: str,
|
137
138
|
inputs: Dict[str, Union[VMInput, List[VMInput]]],
|
138
139
|
params: Union[Dict[str, Any], None],
|
139
|
-
description: str,
|
140
|
-
generate_description: bool = True,
|
141
140
|
title: Optional[str] = None,
|
142
141
|
):
|
143
142
|
"""Build a TestResult object from a set of raw test function outputs"""
|
@@ -149,6 +148,7 @@ def build_test_result(
|
|
149
148
|
ref_id=ref_id,
|
150
149
|
inputs=inputs,
|
151
150
|
params=params if params else None, # None if empty dict or None
|
151
|
+
doc=test_doc,
|
152
152
|
)
|
153
153
|
|
154
154
|
if not isinstance(outputs, tuple):
|
@@ -157,16 +157,6 @@ def build_test_result(
|
|
157
157
|
for item in outputs:
|
158
158
|
process_output(item, result)
|
159
159
|
|
160
|
-
result.description = get_result_description(
|
161
|
-
test_id=test_id,
|
162
|
-
test_description=description,
|
163
|
-
tables=result.tables,
|
164
|
-
figures=result.figures,
|
165
|
-
metric=result.metric,
|
166
|
-
should_generate=generate_description,
|
167
|
-
title=title,
|
168
|
-
)
|
169
|
-
|
170
160
|
return result
|
171
161
|
|
172
162
|
|
@@ -177,7 +167,6 @@ def _run_composite_test(
|
|
177
167
|
input_grid: Union[Dict[str, List[Any]], List[Dict[str, Any]], None],
|
178
168
|
params: Union[Dict[str, Any], None],
|
179
169
|
param_grid: Union[Dict[str, List[Any]], List[Dict[str, Any]], None],
|
180
|
-
generate_description: bool,
|
181
170
|
title: Optional[str] = None,
|
182
171
|
):
|
183
172
|
"""Run a composite test i.e. a test made up of multiple metrics"""
|
@@ -199,6 +188,14 @@ def _run_composite_test(
|
|
199
188
|
if not all(result.metric is not None for result in results):
|
200
189
|
raise ValueError("All tests must return a metric when used as a composite test")
|
201
190
|
|
191
|
+
# Create composite docstring from all test results
|
192
|
+
composite_doc = "\n\n".join(
|
193
|
+
[
|
194
|
+
f"{test_id_to_name(result.result_id)}:\n{_test_description(result.doc)}"
|
195
|
+
for result in results
|
196
|
+
]
|
197
|
+
)
|
198
|
+
|
202
199
|
return build_test_result(
|
203
200
|
outputs=[
|
204
201
|
{
|
@@ -208,12 +205,9 @@ def _run_composite_test(
|
|
208
205
|
for result in results
|
209
206
|
], # pass in a single table with metric values as our 'outputs'
|
210
207
|
test_id=test_id,
|
208
|
+
test_doc=composite_doc,
|
211
209
|
inputs=results[0].inputs,
|
212
210
|
params=results[0].params,
|
213
|
-
description="\n\n".join(
|
214
|
-
[_test_description(result.description, num_lines=1) for result in results]
|
215
|
-
), # join truncated (first line only) test descriptions
|
216
|
-
generate_description=generate_description,
|
217
211
|
title=title,
|
218
212
|
)
|
219
213
|
|
@@ -226,7 +220,6 @@ def _run_comparison_test(
|
|
226
220
|
input_grid: Union[Dict[str, List[Any]], List[Dict[str, Any]], None],
|
227
221
|
params: Union[Dict[str, Any], None],
|
228
222
|
param_grid: Union[Dict[str, List[Any]], List[Dict[str, Any]], None],
|
229
|
-
generate_description: bool,
|
230
223
|
title: Optional[str] = None,
|
231
224
|
):
|
232
225
|
"""Run a comparison test i.e. a test that compares multiple outputs of a test across
|
@@ -255,24 +248,43 @@ def _run_comparison_test(
|
|
255
248
|
# composite tests have a test_id thats built from the name
|
256
249
|
if not test_id:
|
257
250
|
test_id = results[0].result_id
|
258
|
-
|
251
|
+
test_doc = results[0].doc
|
259
252
|
else:
|
260
|
-
|
253
|
+
test_doc = describe_test(test_id, raw=True)["Description"]
|
261
254
|
|
262
255
|
combined_outputs, combined_inputs, combined_params = combine_results(results)
|
263
256
|
|
264
257
|
return build_test_result(
|
265
258
|
outputs=tuple(combined_outputs),
|
266
259
|
test_id=test_id,
|
260
|
+
test_doc=test_doc,
|
267
261
|
inputs=combined_inputs,
|
268
262
|
params=combined_params,
|
269
|
-
description=description,
|
270
|
-
generate_description=generate_description,
|
271
263
|
title=title,
|
272
264
|
)
|
273
265
|
|
274
266
|
|
275
|
-
def
|
267
|
+
def _run_test(test_id: TestID, inputs: Dict[str, Any], params: Dict[str, Any]):
|
268
|
+
"""Run a standard test and return a TestResult object"""
|
269
|
+
test_func = load_test(test_id)
|
270
|
+
input_kwargs, param_kwargs = _get_test_kwargs(
|
271
|
+
test_func=test_func,
|
272
|
+
inputs=inputs or {},
|
273
|
+
params=params or {},
|
274
|
+
)
|
275
|
+
|
276
|
+
raw_result = test_func(**input_kwargs, **param_kwargs)
|
277
|
+
|
278
|
+
return build_test_result(
|
279
|
+
outputs=raw_result,
|
280
|
+
test_id=test_id,
|
281
|
+
test_doc=getdoc(test_func),
|
282
|
+
inputs=input_kwargs,
|
283
|
+
params=param_kwargs,
|
284
|
+
)
|
285
|
+
|
286
|
+
|
287
|
+
def run_test( # noqa: C901
|
276
288
|
test_id: Union[TestID, None] = None,
|
277
289
|
name: Union[str, None] = None,
|
278
290
|
unit_metrics: Union[List[TestID], None] = None,
|
@@ -283,6 +295,7 @@ def run_test(
|
|
283
295
|
show: bool = True,
|
284
296
|
generate_description: bool = True,
|
285
297
|
title: Optional[str] = None,
|
298
|
+
post_process_fn: Union[Callable[[TestResult], None], None] = None,
|
286
299
|
**kwargs,
|
287
300
|
) -> TestResult:
|
288
301
|
"""Run a ValidMind or custom test
|
@@ -306,6 +319,7 @@ def run_test(
|
|
306
319
|
show (bool, optional): Whether to display results. Defaults to True.
|
307
320
|
generate_description (bool, optional): Whether to generate a description. Defaults to True.
|
308
321
|
title (str, optional): Custom title for the test result
|
322
|
+
post_process_fn (Callable[[TestResult], None], optional): Function to post-process the test result
|
309
323
|
|
310
324
|
Returns:
|
311
325
|
TestResult: A TestResult object containing the test results
|
@@ -343,7 +357,6 @@ def run_test(
|
|
343
357
|
input_grid=input_grid,
|
344
358
|
params=params,
|
345
359
|
param_grid=param_grid,
|
346
|
-
generate_description=generate_description,
|
347
360
|
)
|
348
361
|
|
349
362
|
elif unit_metrics:
|
@@ -357,43 +370,28 @@ def run_test(
|
|
357
370
|
input_grid=input_grid,
|
358
371
|
params=params,
|
359
372
|
param_grid=param_grid,
|
360
|
-
generate_description=generate_description,
|
361
|
-
title=title,
|
362
|
-
)
|
363
|
-
|
364
|
-
elif input_grid or param_grid:
|
365
|
-
result = _run_comparison_test(
|
366
|
-
test_id=test_id,
|
367
|
-
inputs=inputs,
|
368
|
-
input_grid=input_grid,
|
369
|
-
params=params,
|
370
|
-
param_grid=param_grid,
|
371
|
-
generate_description=generate_description,
|
372
373
|
title=title,
|
373
374
|
)
|
374
375
|
|
375
376
|
else:
|
376
|
-
|
377
|
-
|
378
|
-
input_kwargs, param_kwargs = _get_test_kwargs(
|
379
|
-
test_func, inputs or {}, params or {}
|
380
|
-
)
|
381
|
-
|
382
|
-
raw_result = test_func(**input_kwargs, **param_kwargs)
|
383
|
-
|
384
|
-
result = build_test_result(
|
385
|
-
outputs=raw_result,
|
386
|
-
test_id=test_id,
|
387
|
-
inputs=input_kwargs,
|
388
|
-
params=param_kwargs,
|
389
|
-
description=getdoc(test_func),
|
390
|
-
generate_description=generate_description,
|
391
|
-
title=title,
|
392
|
-
)
|
377
|
+
result = _run_test(test_id, inputs, params)
|
393
378
|
|
394
379
|
end_time = time.perf_counter()
|
395
380
|
result.metadata = _get_run_metadata(duration_seconds=end_time - start_time)
|
396
381
|
|
382
|
+
if post_process_fn:
|
383
|
+
result = post_process_fn(result)
|
384
|
+
|
385
|
+
result.description = get_result_description(
|
386
|
+
test_id=test_id,
|
387
|
+
test_description=result.doc,
|
388
|
+
tables=result.tables,
|
389
|
+
figures=result.figures,
|
390
|
+
metric=result.metric,
|
391
|
+
should_generate=generate_description,
|
392
|
+
title=title,
|
393
|
+
)
|
394
|
+
|
397
395
|
if show:
|
398
396
|
result.show()
|
399
397
|
|
validmind/utils.py
CHANGED
@@ -168,6 +168,17 @@ class NumpyEncoder(json.JSONEncoder):
|
|
168
168
|
return super().iterencode(obj, _one_shot)
|
169
169
|
|
170
170
|
|
171
|
+
class HumanReadableEncoder(NumpyEncoder):
|
172
|
+
def __init__(self, *args, **kwargs):
|
173
|
+
super().__init__(*args, **kwargs)
|
174
|
+
# truncate ndarrays to 10 items
|
175
|
+
self.type_handlers[self.is_numpy_ndarray] = lambda obj: (
|
176
|
+
obj.tolist()[:5] + ["..."] + obj.tolist()[-5:]
|
177
|
+
if len(obj) > 10
|
178
|
+
else obj.tolist()
|
179
|
+
)
|
180
|
+
|
181
|
+
|
171
182
|
def get_full_typename(o: Any) -> Any:
|
172
183
|
"""We determine types based on type names so we don't have to import
|
173
184
|
(and therefore depend on) PyTorch, TensorFlow, etc.
|
@@ -448,18 +459,23 @@ def get_dataset_info(dataset):
|
|
448
459
|
|
449
460
|
|
450
461
|
def preview_test_config(config):
|
451
|
-
|
462
|
+
"""Preview test configuration in a collapsible HTML section.
|
463
|
+
|
464
|
+
Args:
|
465
|
+
config (dict): Test configuration dictionary
|
466
|
+
"""
|
467
|
+
|
468
|
+
try:
|
469
|
+
formatted_json = json.dumps(serialize(config), indent=4)
|
470
|
+
except TypeError as e:
|
471
|
+
logger.error(f"JSON serialization failed: {e}")
|
472
|
+
return
|
452
473
|
|
453
|
-
# JavaScript + HTML for the collapsible section
|
454
474
|
collapsible_html = f"""
|
455
475
|
<script>
|
456
476
|
function toggleOutput() {{
|
457
477
|
var content = document.getElementById("collapsibleContent");
|
458
|
-
|
459
|
-
content.style.display = "block";
|
460
|
-
}} else {{
|
461
|
-
content.style.display = "none";
|
462
|
-
}}
|
478
|
+
content.style.display = content.style.display === "none" ? "block" : "none";
|
463
479
|
}}
|
464
480
|
</script>
|
465
481
|
<button onclick="toggleOutput()">Preview Config</button>
|
@@ -545,3 +561,14 @@ def inspect_obj(obj):
|
|
545
561
|
# Loop through the parameters and print detailed information
|
546
562
|
for param_name, param in sig.parameters.items():
|
547
563
|
print(f"{param_name} - ({param.default})")
|
564
|
+
|
565
|
+
|
566
|
+
def serialize(obj):
|
567
|
+
"""Convert objects to JSON-serializable format with readable descriptions."""
|
568
|
+
if isinstance(obj, dict):
|
569
|
+
return {k: serialize(v) for k, v in obj.items()}
|
570
|
+
elif isinstance(obj, (list, tuple)):
|
571
|
+
return [serialize(x) for x in obj]
|
572
|
+
elif isinstance(obj, (pd.DataFrame, pd.Series)):
|
573
|
+
return "" # Simple empty string for non-serializable objects
|
574
|
+
return obj
|
validmind/vm_models/figure.py
CHANGED
@@ -33,6 +33,18 @@ def is_png_image(figure) -> bool:
|
|
33
33
|
return isinstance(figure, bytes)
|
34
34
|
|
35
35
|
|
36
|
+
def create_figure(
|
37
|
+
figure: Union[matplotlib.figure.Figure, go.Figure, go.FigureWidget, bytes],
|
38
|
+
key: str,
|
39
|
+
ref_id: str,
|
40
|
+
) -> "Figure":
|
41
|
+
"""Create a VM Figure object from a raw figure object"""
|
42
|
+
if is_matplotlib_figure(figure) or is_plotly_figure(figure) or is_png_image(figure):
|
43
|
+
return Figure(key=key, figure=figure, ref_id=ref_id)
|
44
|
+
|
45
|
+
raise ValueError(f"Unsupported figure type: {type(figure)}")
|
46
|
+
|
47
|
+
|
36
48
|
@dataclass
|
37
49
|
class Figure:
|
38
50
|
"""
|
@@ -55,6 +67,9 @@ class Figure:
|
|
55
67
|
):
|
56
68
|
self.figure = go.FigureWidget(self.figure)
|
57
69
|
|
70
|
+
def __repr__(self):
|
71
|
+
return f"Figure(key={self.key}, ref_id={self.ref_id})"
|
72
|
+
|
58
73
|
def to_widget(self):
|
59
74
|
"""
|
60
75
|
Returns the ipywidget compatible representation of the figure. Ideally
|
@@ -2,6 +2,6 @@
|
|
2
2
|
# See the LICENSE file in the root of this repository for details.
|
3
3
|
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
4
|
|
5
|
-
from .result import ErrorResult, Result, ResultTable, TestResult
|
5
|
+
from .result import ErrorResult, RawData, Result, ResultTable, TestResult
|
6
6
|
|
7
|
-
__all__ = ["ErrorResult", "Result", "ResultTable", "TestResult"]
|
7
|
+
__all__ = ["ErrorResult", "RawData", "Result", "ResultTable", "TestResult"]
|