validmind 2.2.2__py3-none-any.whl → 2.2.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- validmind/__version__.py +1 -1
- validmind/ai.py +70 -47
- validmind/api_client.py +40 -13
- validmind/errors.py +1 -1
- validmind/tests/__init__.py +1 -1
- validmind/tests/decorator.py +10 -5
- validmind/unit_metrics/composite.py +8 -6
- validmind/utils.py +44 -1
- validmind/vm_models/dataset/dataset.py +4 -2
- validmind/vm_models/test/metric.py +9 -24
- validmind/vm_models/test/result_wrapper.py +64 -5
- validmind/vm_models/test/threshold_test.py +10 -28
- {validmind-2.2.2.dist-info → validmind-2.2.4.dist-info}/METADATA +1 -1
- {validmind-2.2.2.dist-info → validmind-2.2.4.dist-info}/RECORD +17 -17
- {validmind-2.2.2.dist-info → validmind-2.2.4.dist-info}/LICENSE +0 -0
- {validmind-2.2.2.dist-info → validmind-2.2.4.dist-info}/WHEEL +0 -0
- {validmind-2.2.2.dist-info → validmind-2.2.4.dist-info}/entry_points.txt +0 -0
validmind/__version__.py
CHANGED
@@ -1 +1 @@
|
|
1
|
-
__version__ = "2.2.
|
1
|
+
__version__ = "2.2.4"
|
validmind/ai.py
CHANGED
@@ -8,45 +8,65 @@ import os
|
|
8
8
|
from openai import AzureOpenAI, OpenAI
|
9
9
|
|
10
10
|
SYSTEM_PROMPT = """
|
11
|
-
You are an expert data scientist and MRM specialist
|
12
|
-
|
11
|
+
You are an expert data scientist and MRM specialist.
|
12
|
+
You are tasked with analyzing the results of a quantitative test run on some model or dataset.
|
13
|
+
Your goal is to create a test description that will act as part of the model documentation.
|
14
|
+
You will provide both the developer and other consumers of the documentation with a clear and concise "interpretation" of the results they will see.
|
15
|
+
The overarching theme to maintain is MRM documentation.
|
13
16
|
|
14
|
-
Examine the provided statistical test results and compose a
|
15
|
-
|
16
|
-
|
17
|
-
these statistics might influence the development and performance of a predictive model. Identify
|
18
|
-
and explain any discernible trends or anomalies in the test results.
|
19
|
-
|
20
|
-
Your analysis will act as the description of the result in the model documentation.
|
17
|
+
Examine the provided statistical test results and compose a description of the results.
|
18
|
+
This will act as the description and interpretation of the result in the model documentation.
|
19
|
+
It will be displayed alongside the test results table and figures.
|
21
20
|
|
22
21
|
Avoid long sentences and complex vocabulary.
|
23
22
|
Structure the response clearly and logically.
|
24
|
-
Use valid Markdown syntax to format the response
|
23
|
+
Use valid Markdown syntax to format the response.
|
24
|
+
Respond only with your analysis and insights, not the verbatim test results.
|
25
|
+
Respond only with the markdown content, no explanation or context for your response is necessary.
|
25
26
|
Use the Test ID that is provided to form the Test Name e.g. "ClassImbalance" -> "Class Imbalance".
|
27
|
+
|
28
|
+
Explain the test, its purpose, its mechanism/formula etc and why it is useful.
|
29
|
+
If relevant, provide a very brief description of the way this test is used in model/dataset evaluation and how it is interpreted.
|
30
|
+
Highlight the key insights from the test results. The key insights should be concise and easily understood.
|
31
|
+
End the response with any closing remarks, summary or additional useful information.
|
32
|
+
|
26
33
|
Use the following format for the response (feel free to modify slightly if necessary):
|
27
34
|
```
|
28
|
-
**<Test Name>** <continue to explain what it does in detail>...
|
35
|
+
**<Test Name>** calculates the xyz <continue to explain what it does in detail>...
|
36
|
+
|
37
|
+
This test is useful for <explain why and for what this test is useful>...
|
29
38
|
|
30
|
-
|
39
|
+
**Key Insights:**
|
31
40
|
|
32
|
-
|
41
|
+
The following key insights can be identified in the test results:
|
33
42
|
|
34
|
-
- **<key insight 1 - title>**: <explanation of key insight 1>
|
43
|
+
- **<key insight 1 - title>**: <concise explanation of key insight 1>
|
35
44
|
- ...<continue with any other key insights using the same format>
|
36
45
|
```
|
37
46
|
It is very important that the text is nicely formatted and contains enough information to be useful to the user as documentation.
|
38
47
|
""".strip()
|
48
|
+
|
49
|
+
|
39
50
|
USER_PROMPT = """
|
40
|
-
Test ID: {test_name}
|
41
|
-
|
42
|
-
Test
|
43
|
-
{
|
44
|
-
Test
|
51
|
+
Test ID: `{test_name}`
|
52
|
+
|
53
|
+
<Test Docstring>
|
54
|
+
{test_description}
|
55
|
+
</Test Docstring>
|
56
|
+
|
57
|
+
<Test Results Summary>
|
45
58
|
{test_summary}
|
59
|
+
</Test Results Summary>
|
46
60
|
""".strip()
|
61
|
+
|
62
|
+
|
47
63
|
USER_PROMPT_FIGURES = """
|
48
|
-
Test ID: {test_name}
|
49
|
-
|
64
|
+
Test ID: `{test_name}`
|
65
|
+
|
66
|
+
<Test Docstring>
|
67
|
+
{test_description}
|
68
|
+
</Test Docstring>
|
69
|
+
|
50
70
|
The attached plots show the results of the test.
|
51
71
|
""".strip()
|
52
72
|
|
@@ -113,21 +133,40 @@ class DescriptionFuture:
|
|
113
133
|
def generate_description_async(
|
114
134
|
test_name: str,
|
115
135
|
test_description: str,
|
116
|
-
test_results: str,
|
117
136
|
test_summary: str,
|
118
137
|
figures: list = None,
|
119
138
|
):
|
120
139
|
"""Generate the description for the test results"""
|
121
|
-
|
140
|
+
if not test_summary and not figures:
|
141
|
+
raise ValueError("No summary or figures provided - cannot generate description")
|
122
142
|
|
143
|
+
client, _ = __get_client_and_model()
|
123
144
|
# get last part of test id
|
124
145
|
test_name = test_name.split(".")[-1]
|
125
146
|
|
126
|
-
if
|
127
|
-
|
128
|
-
|
147
|
+
if test_summary:
|
148
|
+
return (
|
149
|
+
client.chat.completions.create(
|
150
|
+
model="gpt-4o",
|
151
|
+
messages=[
|
152
|
+
{"role": "system", "content": SYSTEM_PROMPT},
|
153
|
+
{
|
154
|
+
"role": "user",
|
155
|
+
"content": USER_PROMPT.format(
|
156
|
+
test_name=test_name,
|
157
|
+
test_description=test_description,
|
158
|
+
test_summary=test_summary,
|
159
|
+
),
|
160
|
+
},
|
161
|
+
],
|
162
|
+
)
|
163
|
+
.choices[0]
|
164
|
+
.message.content.strip("```")
|
165
|
+
.strip()
|
166
|
+
)
|
129
167
|
|
130
|
-
|
168
|
+
return (
|
169
|
+
client.chat.completions.create(
|
131
170
|
model="gpt-4o",
|
132
171
|
messages=[
|
133
172
|
{"role": "system", "content": SYSTEM_PROMPT},
|
@@ -154,30 +193,15 @@ def generate_description_async(
|
|
154
193
|
},
|
155
194
|
],
|
156
195
|
)
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
{"role": "system", "content": SYSTEM_PROMPT},
|
162
|
-
{
|
163
|
-
"role": "user",
|
164
|
-
"content": USER_PROMPT.format(
|
165
|
-
test_name=test_name,
|
166
|
-
test_description=test_description,
|
167
|
-
test_results=test_results,
|
168
|
-
test_summary=test_summary,
|
169
|
-
),
|
170
|
-
},
|
171
|
-
],
|
172
|
-
)
|
173
|
-
|
174
|
-
return response.choices[0].message.content.strip("```").strip()
|
196
|
+
.choices[0]
|
197
|
+
.message.content.strip("```")
|
198
|
+
.strip()
|
199
|
+
)
|
175
200
|
|
176
201
|
|
177
202
|
def generate_description(
|
178
203
|
test_name: str,
|
179
204
|
test_description: str,
|
180
|
-
test_results: str,
|
181
205
|
test_summary: str,
|
182
206
|
figures: list = None,
|
183
207
|
):
|
@@ -185,7 +209,6 @@ def generate_description(
|
|
185
209
|
generate_description_async,
|
186
210
|
test_name,
|
187
211
|
test_description,
|
188
|
-
test_results,
|
189
212
|
test_summary,
|
190
213
|
figures,
|
191
214
|
)
|
validmind/api_client.py
CHANGED
@@ -161,14 +161,20 @@ def __ping() -> Dict[str, Any]:
|
|
161
161
|
|
162
162
|
init_sentry(client_info.get("sentry_config", {}))
|
163
163
|
|
164
|
+
# Only show this confirmation the first time we connect to the API
|
165
|
+
ack_connected = False
|
166
|
+
if client_config.project is None:
|
167
|
+
ack_connected = True
|
168
|
+
|
164
169
|
client_config.project = client_info["project"]
|
165
170
|
client_config.documentation_template = client_info.get("documentation_template", {})
|
166
171
|
client_config.feature_flags = client_info.get("feature_flags", {})
|
167
172
|
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
173
|
+
if ack_connected:
|
174
|
+
logger.info(
|
175
|
+
f"Connected to ValidMind. Project: {client_config.project['name']}"
|
176
|
+
f" ({client_config.project['cuid']})"
|
177
|
+
)
|
172
178
|
|
173
179
|
|
174
180
|
def reload():
|
@@ -358,7 +364,11 @@ async def log_metadata(
|
|
358
364
|
|
359
365
|
|
360
366
|
async def log_metrics(
|
361
|
-
metrics: List[MetricResult],
|
367
|
+
metrics: List[MetricResult],
|
368
|
+
inputs: List[str],
|
369
|
+
output_template: str = None,
|
370
|
+
section_id: str = None,
|
371
|
+
position: int = None,
|
362
372
|
) -> Dict[str, Any]:
|
363
373
|
"""Logs metrics to ValidMind API.
|
364
374
|
|
@@ -366,6 +376,8 @@ async def log_metrics(
|
|
366
376
|
metrics (list): A list of MetricResult objects
|
367
377
|
inputs (list): A list of input keys (names) that were used to run the test
|
368
378
|
output_template (str): The optional output template for the test
|
379
|
+
section_id (str): The section ID add a test driven block to the documentation
|
380
|
+
position (int): The position in the section to add the test driven block
|
369
381
|
|
370
382
|
Raises:
|
371
383
|
Exception: If the API call fails
|
@@ -373,7 +385,14 @@ async def log_metrics(
|
|
373
385
|
Returns:
|
374
386
|
dict: The response from the API
|
375
387
|
"""
|
388
|
+
params = {}
|
389
|
+
if section_id:
|
390
|
+
params["section_id"] = section_id
|
391
|
+
if position is not None:
|
392
|
+
params["position"] = position
|
393
|
+
|
376
394
|
data = []
|
395
|
+
|
377
396
|
for metric in metrics:
|
378
397
|
metric_data = {
|
379
398
|
**metric.serialize(),
|
@@ -388,6 +407,7 @@ async def log_metrics(
|
|
388
407
|
try:
|
389
408
|
return await _post(
|
390
409
|
"log_metrics",
|
410
|
+
params=params,
|
391
411
|
data=json.dumps(data, cls=NumpyEncoder, allow_nan=False),
|
392
412
|
)
|
393
413
|
except Exception as e:
|
@@ -396,7 +416,10 @@ async def log_metrics(
|
|
396
416
|
|
397
417
|
|
398
418
|
async def log_test_result(
|
399
|
-
result: ThresholdTestResults,
|
419
|
+
result: ThresholdTestResults,
|
420
|
+
inputs: List[str],
|
421
|
+
section_id: str = None,
|
422
|
+
position: int = None,
|
400
423
|
) -> Dict[str, Any]:
|
401
424
|
"""Logs test results information
|
402
425
|
|
@@ -406,8 +429,8 @@ async def log_test_result(
|
|
406
429
|
Args:
|
407
430
|
result (validmind.ThresholdTestResults): A ThresholdTestResults object
|
408
431
|
inputs (list): A list of input keys (names) that were used to run the test
|
409
|
-
|
410
|
-
|
432
|
+
section_id (str, optional): The section ID add a test driven block to the documentation
|
433
|
+
position (int): The position in the section to add the test driven block
|
411
434
|
|
412
435
|
Raises:
|
413
436
|
Exception: If the API call fails
|
@@ -415,10 +438,16 @@ async def log_test_result(
|
|
415
438
|
Returns:
|
416
439
|
dict: The response from the API
|
417
440
|
"""
|
441
|
+
params = {}
|
442
|
+
if section_id:
|
443
|
+
params["section_id"] = section_id
|
444
|
+
if position is not None:
|
445
|
+
params["position"] = position
|
446
|
+
|
418
447
|
try:
|
419
448
|
return await _post(
|
420
449
|
"log_test_results",
|
421
|
-
params=
|
450
|
+
params=params,
|
422
451
|
data=json.dumps(
|
423
452
|
{
|
424
453
|
**result.serialize(),
|
@@ -434,7 +463,7 @@ async def log_test_result(
|
|
434
463
|
|
435
464
|
|
436
465
|
def log_test_results(
|
437
|
-
results: List[ThresholdTestResults], inputs
|
466
|
+
results: List[ThresholdTestResults], inputs
|
438
467
|
) -> List[Callable[..., Dict[str, Any]]]:
|
439
468
|
"""Logs test results information
|
440
469
|
|
@@ -444,8 +473,6 @@ def log_test_results(
|
|
444
473
|
Args:
|
445
474
|
results (list): A list of ThresholdTestResults objects
|
446
475
|
inputs (list): A list of input keys (names) that were used to run the test
|
447
|
-
dataset_type (str, optional): The type of dataset. Can be one of "training",
|
448
|
-
"test", or "validation". Defaults to "training".
|
449
476
|
|
450
477
|
Raises:
|
451
478
|
Exception: If the API call fails
|
@@ -456,7 +483,7 @@ def log_test_results(
|
|
456
483
|
try:
|
457
484
|
responses = [] # TODO: use asyncio.gather
|
458
485
|
for result in results:
|
459
|
-
responses.append(run_async(log_test_result, result, inputs
|
486
|
+
responses.append(run_async(log_test_result, result, inputs))
|
460
487
|
except Exception as e:
|
461
488
|
logger.error("Error logging test results to ValidMind API")
|
462
489
|
raise e
|
validmind/errors.py
CHANGED
@@ -339,7 +339,7 @@ def raise_api_error(error_string):
|
|
339
339
|
try:
|
340
340
|
json_response = json.loads(error_string)
|
341
341
|
api_code = json_response.get("code")
|
342
|
-
api_description = json_response.get("description")
|
342
|
+
api_description = json_response.get("description", json_response.get("message"))
|
343
343
|
except json.decoder.JSONDecodeError:
|
344
344
|
api_code = "unknown"
|
345
345
|
api_description = error_string
|
validmind/tests/__init__.py
CHANGED
@@ -388,7 +388,7 @@ def describe_test(test_id: str = None, raw: bool = False, show: bool = True):
|
|
388
388
|
),
|
389
389
|
table_display="table" if details["Params"] else "none",
|
390
390
|
example_inputs=json.dumps(
|
391
|
-
{name: f"my_vm_{name}" for name in details["Required Inputs"]},
|
391
|
+
{name: f"my_vm_{name}" for name in (details["Required Inputs"] or [])},
|
392
392
|
indent=4,
|
393
393
|
),
|
394
394
|
example_params=json.dumps(details["Params"] or {}, indent=4, cls=NumpyEncoder),
|
validmind/tests/decorator.py
CHANGED
@@ -15,6 +15,7 @@ import pandas as pd
|
|
15
15
|
|
16
16
|
from validmind.errors import MissingRequiredTestInputError
|
17
17
|
from validmind.logging import get_logger
|
18
|
+
from validmind.utils import get_description_metadata
|
18
19
|
from validmind.vm_models import (
|
19
20
|
Metric,
|
20
21
|
MetricResult,
|
@@ -113,20 +114,24 @@ def _build_result(results, test_id, description, output_template, inputs): # no
|
|
113
114
|
else:
|
114
115
|
process_item(results)
|
115
116
|
|
117
|
+
result_summary = ResultSummary(results=tables)
|
118
|
+
|
116
119
|
return MetricResultWrapper(
|
117
120
|
result_id=test_id,
|
118
121
|
metric=MetricResult(
|
119
122
|
key=test_id,
|
120
123
|
ref_id=ref_id,
|
121
124
|
value="Empty",
|
122
|
-
summary=
|
125
|
+
summary=result_summary,
|
123
126
|
),
|
124
127
|
figures=figures,
|
125
128
|
result_metadata=[
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
129
|
+
get_description_metadata(
|
130
|
+
test_id=test_id,
|
131
|
+
default_description=description,
|
132
|
+
summary=result_summary.serialize(),
|
133
|
+
figures=figures,
|
134
|
+
)
|
130
135
|
],
|
131
136
|
inputs=inputs,
|
132
137
|
output_template=output_template,
|
@@ -8,7 +8,7 @@ from uuid import uuid4
|
|
8
8
|
|
9
9
|
from ..logging import get_logger
|
10
10
|
from ..tests.decorator import _inspect_signature
|
11
|
-
from ..utils import run_async, test_id_to_name
|
11
|
+
from ..utils import get_description_metadata, run_async, test_id_to_name
|
12
12
|
from ..vm_models.test.metric import Metric
|
13
13
|
from ..vm_models.test.metric_result import MetricResult
|
14
14
|
from ..vm_models.test.result_summary import ResultSummary, ResultTable
|
@@ -200,13 +200,15 @@ def run_metrics(
|
|
200
200
|
</style>
|
201
201
|
"""
|
202
202
|
|
203
|
+
result_summary = ResultSummary(results=[ResultTable(data=[results])])
|
203
204
|
result_wrapper = MetricResultWrapper(
|
204
205
|
result_id=test_id,
|
205
206
|
result_metadata=[
|
206
|
-
|
207
|
-
|
208
|
-
|
209
|
-
|
207
|
+
get_description_metadata(
|
208
|
+
test_id=test_id,
|
209
|
+
default_description=description,
|
210
|
+
summary=result_summary.serialize(),
|
211
|
+
),
|
210
212
|
{
|
211
213
|
"content_id": f"composite_metric_def:{test_id}:unit_metrics",
|
212
214
|
"json": metric_ids,
|
@@ -222,7 +224,7 @@ def run_metrics(
|
|
222
224
|
key=test_id,
|
223
225
|
ref_id=str(uuid4()),
|
224
226
|
value=results,
|
225
|
-
summary=
|
227
|
+
summary=result_summary,
|
226
228
|
),
|
227
229
|
)
|
228
230
|
|
validmind/utils.py
CHANGED
@@ -6,6 +6,7 @@ import asyncio
|
|
6
6
|
import difflib
|
7
7
|
import json
|
8
8
|
import math
|
9
|
+
import os
|
9
10
|
import re
|
10
11
|
import sys
|
11
12
|
from platform import python_version
|
@@ -25,6 +26,7 @@ from matplotlib.axes._axes import _log as matplotlib_axes_logger
|
|
25
26
|
from numpy import ndarray
|
26
27
|
from tabulate import tabulate
|
27
28
|
|
29
|
+
from .ai import generate_description
|
28
30
|
from .html_templates.content_blocks import math_jax_snippet, python_syntax_highlighting
|
29
31
|
|
30
32
|
DEFAULT_BIG_NUMBER_DECIMALS = 2
|
@@ -432,7 +434,9 @@ def display(widget_or_html, syntax_highlighting=True, mathjax=True):
|
|
432
434
|
def md_to_html(md: str, mathml=False) -> str:
|
433
435
|
"""Converts Markdown to HTML using mistune with plugins"""
|
434
436
|
# use mistune with math plugin to convert to html
|
435
|
-
html = mistune.create_markdown(
|
437
|
+
html = mistune.create_markdown(
|
438
|
+
plugins=["math", "table", "strikethrough", "footnotes"]
|
439
|
+
)(md)
|
436
440
|
|
437
441
|
if not mathml:
|
438
442
|
# return the html as is (with latex that will be rendered by MathJax)
|
@@ -453,3 +457,42 @@ def md_to_html(md: str, mathml=False) -> str:
|
|
453
457
|
)
|
454
458
|
|
455
459
|
return html
|
460
|
+
|
461
|
+
|
462
|
+
def get_description_metadata(test_id, default_description, summary=None, figures=None):
|
463
|
+
"""Get Metadata Dictionary for a Test or Metric Result
|
464
|
+
|
465
|
+
Generates an LLM interpretation of the test results or uses the default
|
466
|
+
description and returns a metadata object that can be logged with the test results.
|
467
|
+
|
468
|
+
To enable LLM-generated descriptions, set the VALIDMIND_LLM_DESCRIPTIONS_ENABLED
|
469
|
+
environment variable to "true". The default description will be used if LLM
|
470
|
+
descriptions are disabled.
|
471
|
+
|
472
|
+
Note: Either the summary or figures must be provided to generate the description.
|
473
|
+
|
474
|
+
Args:
|
475
|
+
test_id (str): The test ID
|
476
|
+
default_description (str): The default description for the test
|
477
|
+
summary (Any): The test summary or results to interpret
|
478
|
+
figures (List[Figure]): The figures to attach to the test suite result
|
479
|
+
|
480
|
+
Returns:
|
481
|
+
dict: The metadata object to be logged with the test results
|
482
|
+
"""
|
483
|
+
if os.environ.get("VALIDMIND_LLM_DESCRIPTIONS_ENABLED", "false").lower() == "true":
|
484
|
+
revision_name = "Generated by ValidMind AI"
|
485
|
+
description = generate_description(
|
486
|
+
test_name=test_id,
|
487
|
+
test_description=default_description,
|
488
|
+
test_summary=summary,
|
489
|
+
figures=figures,
|
490
|
+
)
|
491
|
+
else:
|
492
|
+
revision_name = "Default Description"
|
493
|
+
description = default_description
|
494
|
+
|
495
|
+
return {
|
496
|
+
"content_id": f"metric_description:{test_id}::{revision_name}",
|
497
|
+
"text": description,
|
498
|
+
}
|
@@ -114,7 +114,7 @@ class VMDataset:
|
|
114
114
|
if model:
|
115
115
|
self.assign_predictions(model)
|
116
116
|
|
117
|
-
def _set_feature_columns(self, feature_columns):
|
117
|
+
def _set_feature_columns(self, feature_columns=None):
|
118
118
|
if feature_columns is not None and (
|
119
119
|
not isinstance(feature_columns, list)
|
120
120
|
or not all(isinstance(col, str) for col in feature_columns)
|
@@ -269,7 +269,9 @@ class VMDataset:
|
|
269
269
|
column_name (str): The name of the extra column.
|
270
270
|
column_values (np.ndarray, optional): The values of the extra column.
|
271
271
|
"""
|
272
|
-
if column_name not in self.columns and
|
272
|
+
if column_name not in self.columns and (
|
273
|
+
column_values is None or len(column_values) == 0
|
274
|
+
):
|
273
275
|
raise ValueError(
|
274
276
|
"Column values must be provided when the column doesn't exist in the dataset"
|
275
277
|
)
|
@@ -6,15 +6,14 @@
|
|
6
6
|
Class for storing ValidMind metric objects and associated
|
7
7
|
data for display and reporting purposes
|
8
8
|
"""
|
9
|
-
import os
|
10
9
|
from abc import abstractmethod
|
11
10
|
from dataclasses import dataclass
|
12
11
|
from typing import ClassVar, List, Optional, Union
|
13
12
|
|
14
13
|
import pandas as pd
|
15
14
|
|
16
|
-
from ...ai import generate_description
|
17
15
|
from ...errors import MissingCacheResultsArgumentsError
|
16
|
+
from ...utils import get_description_metadata
|
18
17
|
from ..figure import Figure
|
19
18
|
from .metric_result import MetricResult
|
20
19
|
from .result_wrapper import MetricResultWrapper
|
@@ -83,30 +82,16 @@ class Metric(Test):
|
|
83
82
|
summary=self.summary(metric_value),
|
84
83
|
)
|
85
84
|
|
86
|
-
if (
|
87
|
-
os.environ.get("VALIDMIND_LLM_DESCRIPTIONS_ENABLED", "false").lower()
|
88
|
-
== "true"
|
89
|
-
):
|
90
|
-
revision_name = "Generated by ValidMind AI"
|
91
|
-
description = generate_description(
|
92
|
-
test_name=self.test_id,
|
93
|
-
test_description=self.description().splitlines()[0],
|
94
|
-
test_results=metric.serialize()["value"],
|
95
|
-
test_summary=metric.serialize()["summary"],
|
96
|
-
figures=figures,
|
97
|
-
)
|
98
|
-
else:
|
99
|
-
revision_name = "Default Description"
|
100
|
-
description = self.description()
|
101
|
-
|
102
|
-
description_metadata = {
|
103
|
-
"content_id": f"metric_description:{self.test_id}::{revision_name}",
|
104
|
-
"text": description,
|
105
|
-
}
|
106
|
-
|
107
85
|
self.result = MetricResultWrapper(
|
108
86
|
result_id=self.test_id,
|
109
|
-
result_metadata=[
|
87
|
+
result_metadata=[
|
88
|
+
get_description_metadata(
|
89
|
+
test_id=self.test_id,
|
90
|
+
default_description=self.description(),
|
91
|
+
summary=metric.serialize()["summary"],
|
92
|
+
figures=figures,
|
93
|
+
)
|
94
|
+
],
|
110
95
|
metric=metric,
|
111
96
|
figures=figures,
|
112
97
|
inputs=self.get_accessed_inputs(),
|
@@ -155,6 +155,55 @@ class ResultWrapper(ABC):
|
|
155
155
|
tables.append(HTML(value=summary_table))
|
156
156
|
return tables
|
157
157
|
|
158
|
+
def _validate_section_id_for_block(self, section_id: str, position: int = None):
|
159
|
+
"""
|
160
|
+
Validate the section_id exits on the template before logging. We validate
|
161
|
+
if the section exists and if the user provided position is within the bounds
|
162
|
+
of the section. When the position is None, we assume it goes to the end of the section.
|
163
|
+
"""
|
164
|
+
if section_id is None:
|
165
|
+
return
|
166
|
+
|
167
|
+
api_client.reload()
|
168
|
+
found = False
|
169
|
+
client_config = api_client.client_config
|
170
|
+
|
171
|
+
for section in client_config.documentation_template["sections"]:
|
172
|
+
if section["id"] == section_id:
|
173
|
+
found = True
|
174
|
+
break
|
175
|
+
|
176
|
+
if not found:
|
177
|
+
raise ValueError(
|
178
|
+
f"Section with id {section_id} not found in the model's document"
|
179
|
+
)
|
180
|
+
|
181
|
+
# Check if the block already exists in the section
|
182
|
+
block_definition = {
|
183
|
+
"content_id": self.result_id,
|
184
|
+
"content_type": (
|
185
|
+
"metric" if isinstance(self, MetricResultWrapper) else "test"
|
186
|
+
),
|
187
|
+
}
|
188
|
+
blocks = section.get("contents", [])
|
189
|
+
for block in blocks:
|
190
|
+
if (
|
191
|
+
block["content_id"] == block_definition["content_id"]
|
192
|
+
and block["content_type"] == block_definition["content_type"]
|
193
|
+
):
|
194
|
+
logger.info(
|
195
|
+
f"Test driven block with content_id {block_definition['content_id']} already exists in the document's section"
|
196
|
+
)
|
197
|
+
return
|
198
|
+
|
199
|
+
# Validate that the position is within the bounds of the section
|
200
|
+
if position is not None:
|
201
|
+
num_blocks = len(blocks)
|
202
|
+
if position < 0 or position > num_blocks:
|
203
|
+
raise ValueError(
|
204
|
+
f"Invalid position {position}. Must be between 0 and {num_blocks}"
|
205
|
+
)
|
206
|
+
|
158
207
|
def show(self):
|
159
208
|
"""Display the result... May be overridden by subclasses"""
|
160
209
|
display(self.to_widget())
|
@@ -164,9 +213,11 @@ class ResultWrapper(ABC):
|
|
164
213
|
"""Log the result... Must be overridden by subclasses"""
|
165
214
|
raise NotImplementedError
|
166
215
|
|
167
|
-
def log(self):
|
216
|
+
def log(self, section_id: str = None, position: int = None):
|
168
217
|
"""Log the result... May be overridden by subclasses"""
|
169
|
-
|
218
|
+
|
219
|
+
self._validate_section_id_for_block(section_id, position)
|
220
|
+
run_async(self.log_async, section_id=section_id, position=position)
|
170
221
|
|
171
222
|
|
172
223
|
@dataclass
|
@@ -327,7 +378,9 @@ class MetricResultWrapper(ResultWrapper):
|
|
327
378
|
|
328
379
|
return self.metric.summary
|
329
380
|
|
330
|
-
async def log_async(
|
381
|
+
async def log_async(
|
382
|
+
self, section_id: str = None, position: int = None, unsafe=False
|
383
|
+
):
|
331
384
|
tasks = [] # collect tasks to run in parallel (async)
|
332
385
|
|
333
386
|
if self.metric:
|
@@ -339,6 +392,8 @@ class MetricResultWrapper(ResultWrapper):
|
|
339
392
|
metrics=[self.metric],
|
340
393
|
inputs=self.inputs,
|
341
394
|
output_template=self.output_template,
|
395
|
+
section_id=section_id,
|
396
|
+
position=position,
|
342
397
|
)
|
343
398
|
)
|
344
399
|
|
@@ -433,8 +488,12 @@ class ThresholdTestResultWrapper(ResultWrapper):
|
|
433
488
|
|
434
489
|
return VBox(vbox_children)
|
435
490
|
|
436
|
-
async def log_async(self):
|
437
|
-
tasks = [
|
491
|
+
async def log_async(self, section_id: str = None, position: int = None):
|
492
|
+
tasks = [
|
493
|
+
api_client.log_test_result(
|
494
|
+
self.test_results, self.inputs, section_id, position
|
495
|
+
)
|
496
|
+
]
|
438
497
|
|
439
498
|
if self.figures:
|
440
499
|
tasks.append(api_client.log_figures(self.figures))
|
@@ -8,11 +8,10 @@ Test (as test_results) but we'll refer to it as a ThresholdTest to
|
|
8
8
|
avoid confusion with the "tests" in the general data science/modeling sense.
|
9
9
|
"""
|
10
10
|
|
11
|
-
import os
|
12
11
|
from dataclasses import dataclass
|
13
12
|
from typing import ClassVar, List, Optional
|
14
13
|
|
15
|
-
from ...
|
14
|
+
from ...utils import get_description_metadata
|
16
15
|
from ..figure import Figure
|
17
16
|
from .result_summary import ResultSummary, ResultTable
|
18
17
|
from .result_wrapper import ThresholdTestResultWrapper
|
@@ -79,30 +78,16 @@ class ThresholdTest(Test):
|
|
79
78
|
"""
|
80
79
|
result_summary = self.summary(test_results_list, passed)
|
81
80
|
|
82
|
-
if (
|
83
|
-
os.environ.get("VALIDMIND_LLM_DESCRIPTIONS_ENABLED", "false").lower()
|
84
|
-
== "true"
|
85
|
-
):
|
86
|
-
revision_name = "Generated by ValidMind AI"
|
87
|
-
description = generate_description(
|
88
|
-
test_name=self.test_id,
|
89
|
-
test_description=self.description().splitlines()[0],
|
90
|
-
test_results=[result.serialize() for result in test_results_list],
|
91
|
-
test_summary=result_summary.serialize(),
|
92
|
-
figures=figures,
|
93
|
-
)
|
94
|
-
else:
|
95
|
-
revision_name = "Default Description"
|
96
|
-
description = self.description()
|
97
|
-
|
98
|
-
description_metadata = {
|
99
|
-
"content_id": f"test_description:{self.test_id}::{revision_name}",
|
100
|
-
"text": description,
|
101
|
-
}
|
102
|
-
|
103
81
|
self.result = ThresholdTestResultWrapper(
|
104
82
|
result_id=self.test_id,
|
105
|
-
result_metadata=[
|
83
|
+
result_metadata=[
|
84
|
+
get_description_metadata(
|
85
|
+
test_id=self.test_id,
|
86
|
+
default_description=self.description(),
|
87
|
+
summary=result_summary.serialize(),
|
88
|
+
figures=figures,
|
89
|
+
)
|
90
|
+
],
|
106
91
|
inputs=self.get_accessed_inputs(),
|
107
92
|
test_results=ThresholdTestResults(
|
108
93
|
test_name=self.test_id,
|
@@ -112,10 +97,7 @@ class ThresholdTest(Test):
|
|
112
97
|
results=test_results_list,
|
113
98
|
summary=result_summary,
|
114
99
|
),
|
100
|
+
figures=figures,
|
115
101
|
)
|
116
102
|
|
117
|
-
# Allow test results to attach figures to the test suite result
|
118
|
-
if figures:
|
119
|
-
self.result.figures = figures
|
120
|
-
|
121
103
|
return self.result
|
@@ -1,7 +1,7 @@
|
|
1
1
|
validmind/__init__.py,sha256=XqPjCbFMvEYl0cIT42EZKP7DFMYDC7KDW6syo8MGkDg,3682
|
2
|
-
validmind/__version__.py,sha256=
|
3
|
-
validmind/ai.py,sha256
|
4
|
-
validmind/api_client.py,sha256=
|
2
|
+
validmind/__version__.py,sha256=7vcEgCSkbhT9Gg0A1zz0ATFhMnUIE2MAurmV3fHzG8g,22
|
3
|
+
validmind/ai.py,sha256=1cXAAoUjH97YL71xDpNscq3voXZp0ODAdeEU4R3RQjY,7031
|
4
|
+
validmind/api_client.py,sha256=A8RLYFdRGdffXkd1qTa0o2_yy6e491N1o17KHHXmb8I,16035
|
5
5
|
validmind/client.py,sha256=S_FozHlMJBgF8IQJES27LeFoYcoCcGZ6dkxE8adyIRQ,18607
|
6
6
|
validmind/client_config.py,sha256=58L6s6-9vFWC9vkSs_98CjV1YWmlksdhblJtPQxQsAk,1611
|
7
7
|
validmind/datasets/__init__.py,sha256=oYfcvW7BAyUgpghBOnTeGbQF6tpFAWg38rRirdLr8m8,262
|
@@ -55,7 +55,7 @@ validmind/datasets/regression/models/fred_loan_rates_model_2.pkl,sha256=J1ukMdeF
|
|
55
55
|
validmind/datasets/regression/models/fred_loan_rates_model_3.pkl,sha256=IogZPcUQc1F_v11fR6KWT-nRt5JzvK5f7p4Hrw7vLps,40063
|
56
56
|
validmind/datasets/regression/models/fred_loan_rates_model_4.pkl,sha256=cSxhpcrI4hCbxCwZwE2-nr7KObbWpDii3NzpECoXmmM,48292
|
57
57
|
validmind/datasets/regression/models/fred_loan_rates_model_5.pkl,sha256=FkNLHq9xkPMbYks_vyMjFL371mw9SQYbP1iX9lY4Ljo,60343
|
58
|
-
validmind/errors.py,sha256=
|
58
|
+
validmind/errors.py,sha256=qy7Gp6Uom5J6WmLw-CpE5zaTN96SiN7kJjDGBaJdoxY,8023
|
59
59
|
validmind/html_templates/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
60
60
|
validmind/html_templates/content_blocks.py,sha256=AHQ5MlhR1JYldel7soo5ztpTJJ5-kYtyKPBmh-vwxuI,3997
|
61
61
|
validmind/input_registry.py,sha256=zexO3x-vncaoWvQ6VfkvgDLn6x72e2BNel_jCbrVHSE,793
|
@@ -83,7 +83,7 @@ validmind/test_suites/summarization.py,sha256=unEpfk8_etLMuYAtSmBzlqrUpo0kd9Vc3S
|
|
83
83
|
validmind/test_suites/tabular_datasets.py,sha256=WE4eLzRCfiqAxRqXnZFRR3Lo_u-TI6KM6hmTbR8rg5o,1798
|
84
84
|
validmind/test_suites/text_data.py,sha256=YGVGBB05356jN9Gzcy5CHShRzo1fm5mKsZY7YBq0cYU,739
|
85
85
|
validmind/test_suites/time_series.py,sha256=msUyYySAe5VHJJp6z0k0cNt2ekMB8-XkxGER75Zs1hs,6724
|
86
|
-
validmind/tests/__init__.py,sha256=
|
86
|
+
validmind/tests/__init__.py,sha256=n22VyCpZD3xEHkJI2-sb5cSK-fNanL2v6b1uZIav0fc,15921
|
87
87
|
validmind/tests/data_validation/ACFandPACFPlot.py,sha256=__JowNXtc511g_g8VXc0IX7j6qBE5J_v7IoWUKmj_E8,4745
|
88
88
|
validmind/tests/data_validation/ANOVAOneWayTable.py,sha256=udizp4rxW4VlMaXK2RrkPK5tAUgO0C-A3MIifPjBZMw,6019
|
89
89
|
validmind/tests/data_validation/AutoAR.py,sha256=kulDh8i7p7CjenVvXS54kfZe53M-eXzQ7aHZylnA06M,6676
|
@@ -146,7 +146,7 @@ validmind/tests/data_validation/nlp/StopWords.py,sha256=YAwXyfoCLR8jLm7dlXknSpgU
|
|
146
146
|
validmind/tests/data_validation/nlp/TextDescription.py,sha256=27u4xpFX-FYuMcDVRkw3p1ajcCzd5TgaVkPIqOi8GJc,8718
|
147
147
|
validmind/tests/data_validation/nlp/Toxicity.py,sha256=M_ksbd-R8AQjEqhniUETn3iC7zwSbf3xUnwh8OhgXhE,1514
|
148
148
|
validmind/tests/data_validation/nlp/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
149
|
-
validmind/tests/decorator.py,sha256=
|
149
|
+
validmind/tests/decorator.py,sha256=0R6EMNKc7lZT7qNiWgzjC1OkvCHpf-qtncbXv4hTKjk,9541
|
150
150
|
validmind/tests/model_validation/BertScore.py,sha256=XJss9sqrkkkNkT44vIf6s5ID_M87PPjWJ-pN0bCTe2E,5300
|
151
151
|
validmind/tests/model_validation/BleuScore.py,sha256=UmlOsr2chqSEcOV6OtuJey9Cb3qvBD6hDOuvLg2X9_s,4925
|
152
152
|
validmind/tests/model_validation/ClusterSizeDistribution.py,sha256=IKcMBCBsasbi6i8LTqv0H6PUUsG8FXnBtc15uj209WY,4155
|
@@ -267,7 +267,7 @@ validmind/unit_metrics/classification/sklearn/F1.py,sha256=Uiq5sPyNpALhApTkmLUhh
|
|
267
267
|
validmind/unit_metrics/classification/sklearn/Precision.py,sha256=8zO5VDZhfT8R2VFYiV-CzsZwhsTwVAKca4nhD-qALLw,458
|
268
268
|
validmind/unit_metrics/classification/sklearn/ROC_AUC.py,sha256=5-i1xhrLg7Ix4sk7pBKDBtlqBCNRD365LnTvsekSVYs,452
|
269
269
|
validmind/unit_metrics/classification/sklearn/Recall.py,sha256=0WG3A6K9M1UmbWQKoS_wwLfq-cXVDDTIA1ZpaJNyKp8,449
|
270
|
-
validmind/unit_metrics/composite.py,sha256=
|
270
|
+
validmind/unit_metrics/composite.py,sha256=wuOTBnj-eNOBIkCdJ589gr0TEwl0i3TLvqJ1BwwJ9KI,7971
|
271
271
|
validmind/unit_metrics/regression/GiniCoefficient.py,sha256=ebh1rOob8mEmQp0EpXcneAXjc4AIfm6O3Y0_mnTahKA,984
|
272
272
|
validmind/unit_metrics/regression/HuberLoss.py,sha256=JAUxKFpXp1NtQKEJMZlGgxDlk8pFT1tY3ZcxNQPDhHM,680
|
273
273
|
validmind/unit_metrics/regression/KolmogorovSmirnovStatistic.py,sha256=DRHuFH3DqDMy56tzkN8ETwt36FCu1m-nGxK0OJCPMDk,981
|
@@ -279,28 +279,28 @@ validmind/unit_metrics/regression/sklearn/MeanAbsoluteError.py,sha256=LCNgpDw6FB
|
|
279
279
|
validmind/unit_metrics/regression/sklearn/MeanSquaredError.py,sha256=7UQnDTTO7yRRyMe3Zac9ZyjEbbD8pW_8WnZwHdVB_8U,463
|
280
280
|
validmind/unit_metrics/regression/sklearn/RSquaredScore.py,sha256=h9U5ndtnJfNNtKPZIo5n3KRp-m4akQcEo0t1iSwjVzY,420
|
281
281
|
validmind/unit_metrics/regression/sklearn/RootMeanSquaredError.py,sha256=_5IQIU9jNfmTE4NLJvaRWXbudRGV2PS7nYF5e4fkSMY,556
|
282
|
-
validmind/utils.py,sha256=
|
282
|
+
validmind/utils.py,sha256=x3NxuTl0truq0mMhJ1d3z7jpvRifTRcwBf7cw4jkOmY,16209
|
283
283
|
validmind/vm_models/__init__.py,sha256=lmWCD2u4tW6_AH39UnJ24sCcMUcsHbUttz7SaZfrh3s,1168
|
284
284
|
validmind/vm_models/dataset/__init__.py,sha256=U4CxZjdoc0dd9u2AqBl5PJh1UVbzXWNrmundmjLF-qE,346
|
285
|
-
validmind/vm_models/dataset/dataset.py,sha256=
|
285
|
+
validmind/vm_models/dataset/dataset.py,sha256=VlR5Wp5pCoXY3U0C8AbevaySFGf0KJ3QIK3go5OEbog,21843
|
286
286
|
validmind/vm_models/dataset/utils.py,sha256=IZDCM_FNaaMAWm9Vrvmf_h8ZzhkOffxa6SHLnHPP1TA,5157
|
287
287
|
validmind/vm_models/figure.py,sha256=iSrvPcCG5sQrMkX1Fh6c5utRzaroh3bc6IlnGDOK_Eg,6651
|
288
288
|
validmind/vm_models/model.py,sha256=n3XgTPHO4qeHiSxUq4Y8ajPYnxCe2Y_6X-02Ehb4s7M,6074
|
289
|
-
validmind/vm_models/test/metric.py,sha256=
|
289
|
+
validmind/vm_models/test/metric.py,sha256=R7Y-_fzBcIrkJw7-BeifQHMuHTV3HLDc8T3nS_lbCF8,3385
|
290
290
|
validmind/vm_models/test/metric_result.py,sha256=Bak4GDrMlNq5NtgP5exwlPsKZgz3tWgtC6jZqtHjvqM,1987
|
291
291
|
validmind/vm_models/test/output_template.py,sha256=njqCAMyLxwadkCWhACVskyL9-psTgmUysaeeirTVAX4,1500
|
292
292
|
validmind/vm_models/test/result_summary.py,sha256=QJcIKJUeBf5wW3lyue6ctsi1jKSyoiAIfmjudGJiJtc,2028
|
293
|
-
validmind/vm_models/test/result_wrapper.py,sha256=
|
293
|
+
validmind/vm_models/test/result_wrapper.py,sha256=e0hN_oE31g64PU39zYes-PBgqd05TRXRUKF87VnjMUk,17654
|
294
294
|
validmind/vm_models/test/test.py,sha256=434PqhPcbwfCmNjYVwHGMG-rViIatb9-1nmxkdZF8Xo,3104
|
295
|
-
validmind/vm_models/test/threshold_test.py,sha256=
|
295
|
+
validmind/vm_models/test/threshold_test.py,sha256=eRyPWTXsYD2jByR13QiY9ms_0zD1GA8zlmExlIw6rZQ,3615
|
296
296
|
validmind/vm_models/test/threshold_test_result.py,sha256=EXP-g_e3NsnpkvNgYew030qVUoY6ZTHyuuFUXaq-BuM,1954
|
297
297
|
validmind/vm_models/test_context.py,sha256=AN7-atBgOcD04MLVitCFJYooxF6_iNmvI2H4nkv32iw,9035
|
298
298
|
validmind/vm_models/test_suite/runner.py,sha256=U93TauwLNEbAgJIzBZ9k9ip9NnlTt0gACHVgfO7J9BI,6754
|
299
299
|
validmind/vm_models/test_suite/summary.py,sha256=GpqabqN_RcI5vbv4-A9YCLTpUOTKockp6oL1hi8IwVs,4541
|
300
300
|
validmind/vm_models/test_suite/test.py,sha256=cIa-6_YkFp7Io4wBkr09aFNmljmUFSagV4JreLd1Q6Y,5285
|
301
301
|
validmind/vm_models/test_suite/test_suite.py,sha256=Cns2wL54v0T5Mv5_HJb3kMeaa4rtycdqT8KxK9_rWEU,6279
|
302
|
-
validmind-2.2.
|
303
|
-
validmind-2.2.
|
304
|
-
validmind-2.2.
|
305
|
-
validmind-2.2.
|
306
|
-
validmind-2.2.
|
302
|
+
validmind-2.2.4.dist-info/LICENSE,sha256=XonPUfwjvrC5Ombl3y-ko0Wubb1xdG_7nzvIbkZRKHw,35772
|
303
|
+
validmind-2.2.4.dist-info/METADATA,sha256=Vydgse9o2MIx6yxKe4LzrHPa7sJCejaoW_HTjfBvRec,3911
|
304
|
+
validmind-2.2.4.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
|
305
|
+
validmind-2.2.4.dist-info/entry_points.txt,sha256=HuW7YyOv9u_OEWpViQXtv0nfoI67uieJHawKWA4Hv9A,76
|
306
|
+
validmind-2.2.4.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|