validmind 2.7.9__py3-none-any.whl → 2.8.10__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- validmind/__init__.py +58 -10
- validmind/__version__.py +1 -1
- validmind/ai/test_descriptions.py +17 -73
- validmind/api_client.py +18 -1
- validmind/errors.py +1 -1
- validmind/models/r_model.py +5 -1
- validmind/tests/comparison.py +28 -2
- validmind/tests/data_validation/FeatureTargetCorrelationPlot.py +4 -9
- validmind/tests/model_validation/ContextualRecall.py +1 -1
- validmind/tests/model_validation/MeteorScore.py +1 -1
- validmind/tests/model_validation/ToxicityScore.py +1 -1
- validmind/tests/model_validation/embeddings/StabilityAnalysisRandomNoise.py +1 -1
- validmind/tests/model_validation/embeddings/StabilityAnalysisSynonyms.py +1 -1
- validmind/tests/model_validation/embeddings/StabilityAnalysisTranslation.py +1 -1
- validmind/tests/model_validation/embeddings/utils.py +6 -9
- validmind/tests/model_validation/ragas/utils.py +8 -7
- validmind/tests/model_validation/sklearn/MinimumROCAUCScore.py +3 -7
- validmind/tests/model_validation/statsmodels/RegressionModelSummary.py +7 -7
- validmind/tests/prompt_validation/ai_powered_test.py +5 -4
- validmind/tests/run.py +5 -1
- validmind/utils.py +35 -5
- validmind/vm_models/result/result.py +43 -2
- {validmind-2.7.9.dist-info → validmind-2.8.10.dist-info}/METADATA +5 -4
- {validmind-2.7.9.dist-info → validmind-2.8.10.dist-info}/RECORD +27 -32
- {validmind-2.7.9.dist-info → validmind-2.8.10.dist-info}/WHEEL +1 -1
- validmind/ai/test_result_description/config.yaml +0 -29
- validmind/ai/test_result_description/context.py +0 -73
- validmind/ai/test_result_description/image_processing.py +0 -124
- validmind/ai/test_result_description/system.jinja +0 -39
- validmind/ai/test_result_description/user.jinja +0 -30
- {validmind-2.7.9.dist-info → validmind-2.8.10.dist-info}/LICENSE +0 -0
- {validmind-2.7.9.dist-info → validmind-2.8.10.dist-info}/entry_points.txt +0 -0
validmind/__init__.py
CHANGED
@@ -30,8 +30,12 @@ vm.init(
|
|
30
30
|
|
31
31
|
After you have pasted the code snippet into your development source code and executed the code, the Python Library API will register with ValidMind. You can now use the ValidMind Library to document and test your models, and to upload to the ValidMind Platform.
|
32
32
|
"""
|
33
|
+
import threading
|
33
34
|
import warnings
|
34
35
|
|
36
|
+
import pkg_resources
|
37
|
+
from IPython.display import HTML, display
|
38
|
+
|
35
39
|
# Ignore Numba warnings. We are not requiring this package directly
|
36
40
|
from numba.core.errors import NumbaDeprecationWarning, NumbaPendingDeprecationWarning
|
37
41
|
|
@@ -51,30 +55,74 @@ from .client import ( # noqa: E402
|
|
51
55
|
)
|
52
56
|
from .tests.decorator import tags, tasks, test
|
53
57
|
from .tests.run import print_env
|
58
|
+
from .utils import is_notebook, parse_version
|
54
59
|
from .vm_models.result import RawData
|
55
60
|
|
61
|
+
__shown = False
|
62
|
+
|
63
|
+
|
64
|
+
def show_warning(installed, running):
|
65
|
+
global __shown
|
66
|
+
|
67
|
+
if __shown:
|
68
|
+
return
|
69
|
+
__shown = True
|
70
|
+
|
71
|
+
message = (
|
72
|
+
f"⚠️ This kernel is running an older version of validmind ({running}) "
|
73
|
+
f"than the latest version installed on your system ({installed}).\n\n"
|
74
|
+
"You may need to restart the kernel if you are experiencing issues."
|
75
|
+
)
|
76
|
+
display(HTML(f"<div style='color: red;'>{message}</div>"))
|
77
|
+
|
78
|
+
|
79
|
+
def check_version():
|
80
|
+
# get the installed vs running version of validmind
|
81
|
+
# to make sure we are using the latest installed version
|
82
|
+
# in case user has updated the package but forgot to restart the kernel
|
83
|
+
installed = pkg_resources.get_distribution("validmind").version
|
84
|
+
running = __version__
|
85
|
+
|
86
|
+
if parse_version(installed) > parse_version(running):
|
87
|
+
show_warning(installed, running)
|
88
|
+
|
89
|
+
# Schedule the next check for 5 minutes from now
|
90
|
+
timer = threading.Timer(300, check_version)
|
91
|
+
timer.daemon = True
|
92
|
+
timer.start()
|
93
|
+
|
94
|
+
|
95
|
+
if is_notebook():
|
96
|
+
check_version()
|
97
|
+
|
56
98
|
__all__ = [ # noqa
|
57
99
|
"__version__",
|
58
|
-
#
|
59
|
-
"datasets",
|
60
|
-
"errors",
|
61
|
-
"get_test_suite",
|
100
|
+
# main library API
|
62
101
|
"init",
|
102
|
+
"reload",
|
63
103
|
"init_dataset",
|
64
104
|
"init_model",
|
65
105
|
"init_r_model",
|
66
106
|
"preview_template",
|
67
|
-
"print_env",
|
68
|
-
"RawData",
|
69
|
-
"reload",
|
70
107
|
"run_documentation_tests",
|
108
|
+
# log metric function (for direct/bulk/retroactive logging of metrics)
|
109
|
+
"log_metric",
|
110
|
+
# test suite functions (less common)
|
111
|
+
"get_test_suite",
|
71
112
|
"run_test_suite",
|
113
|
+
# helper functions (for troubleshooting)
|
114
|
+
"print_env",
|
115
|
+
# decorators (for building tests
|
72
116
|
"tags",
|
73
117
|
"tasks",
|
74
118
|
"test",
|
75
|
-
|
76
|
-
"
|
119
|
+
# raw data (for post-processing test results and building tests)
|
120
|
+
"RawData",
|
121
|
+
# submodules
|
122
|
+
"datasets",
|
123
|
+
"errors",
|
77
124
|
"vm_models",
|
125
|
+
"tests",
|
78
126
|
"unit_metrics",
|
79
|
-
"
|
127
|
+
"test_suites",
|
80
128
|
]
|
validmind/__version__.py
CHANGED
@@ -1 +1 @@
|
|
1
|
-
__version__ = "2.
|
1
|
+
__version__ = "2.8.10"
|
@@ -4,70 +4,24 @@
|
|
4
4
|
|
5
5
|
import json
|
6
6
|
import os
|
7
|
-
import re
|
8
7
|
from concurrent.futures import ThreadPoolExecutor
|
9
8
|
from typing import List, Optional, Union
|
10
9
|
|
11
10
|
import tiktoken
|
12
|
-
from jinja2 import Template
|
13
11
|
|
14
12
|
from ..client_config import client_config
|
15
13
|
from ..logging import get_logger
|
16
14
|
from ..utils import NumpyEncoder, md_to_html, test_id_to_name
|
17
15
|
from ..vm_models.figure import Figure
|
18
16
|
from ..vm_models.result import ResultTable
|
19
|
-
from .utils import DescriptionFuture
|
17
|
+
from .utils import DescriptionFuture
|
20
18
|
|
21
19
|
__executor = ThreadPoolExecutor()
|
22
|
-
__prompt = None
|
23
20
|
|
24
21
|
logger = get_logger(__name__)
|
25
22
|
|
26
23
|
|
27
|
-
def _load_prompt():
|
28
|
-
global __prompt
|
29
|
-
|
30
|
-
if not __prompt:
|
31
|
-
folder_path = os.path.join(os.path.dirname(__file__), "test_result_description")
|
32
|
-
with open(os.path.join(folder_path, "system.jinja"), "r") as f:
|
33
|
-
system_prompt = f.read()
|
34
|
-
with open(os.path.join(folder_path, "user.jinja"), "r") as f:
|
35
|
-
user_prompt = f.read()
|
36
|
-
|
37
|
-
__prompt = (Template(system_prompt), Template(user_prompt))
|
38
|
-
|
39
|
-
return __prompt
|
40
|
-
|
41
|
-
|
42
|
-
def prompt_to_message(role, prompt):
|
43
|
-
if "[[IMAGE:" not in prompt:
|
44
|
-
return {"role": role, "content": prompt}
|
45
|
-
|
46
|
-
content = []
|
47
|
-
|
48
|
-
# Regex pattern to find [[IMAGE:<b64-data>]] markers
|
49
|
-
pattern = re.compile(r"\[\[IMAGE:(.*?)\]\]", re.DOTALL)
|
50
|
-
|
51
|
-
last_index = 0
|
52
|
-
for match in pattern.finditer(prompt):
|
53
|
-
# Text before the image marker
|
54
|
-
start, end = match.span()
|
55
|
-
if start > last_index:
|
56
|
-
content.append({"type": "text", "text": prompt[last_index:start]})
|
57
|
-
|
58
|
-
content.append({"type": "image_url", "image_url": {"url": match.group(1)}})
|
59
|
-
|
60
|
-
last_index = end
|
61
|
-
|
62
|
-
# Text after the last image
|
63
|
-
if last_index < len(prompt):
|
64
|
-
content.append({"type": "text", "text": prompt[last_index:]})
|
65
|
-
|
66
|
-
return {"role": role, "content": content}
|
67
|
-
|
68
|
-
|
69
24
|
def _get_llm_global_context():
|
70
|
-
|
71
25
|
# Get the context from the environment variable
|
72
26
|
context = os.getenv("VALIDMIND_LLM_DESCRIPTIONS_CONTEXT", "")
|
73
27
|
|
@@ -115,13 +69,13 @@ def generate_description(
|
|
115
69
|
title: Optional[str] = None,
|
116
70
|
):
|
117
71
|
"""Generate the description for the test results"""
|
72
|
+
from validmind.api_client import generate_test_result_description
|
73
|
+
|
118
74
|
if not tables and not figures and not metric:
|
119
75
|
raise ValueError(
|
120
76
|
"No tables, unit metric or figures provided - cannot generate description"
|
121
77
|
)
|
122
78
|
|
123
|
-
client, model = get_client_and_model()
|
124
|
-
|
125
79
|
# get last part of test id
|
126
80
|
test_name = title or test_id.split(".")[-1]
|
127
81
|
|
@@ -145,29 +99,18 @@ def generate_description(
|
|
145
99
|
else:
|
146
100
|
summary = None
|
147
101
|
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
messages = [
|
161
|
-
prompt_to_message("system", system.render(input_data)),
|
162
|
-
prompt_to_message("user", user.render(input_data)),
|
163
|
-
]
|
164
|
-
response = client.chat.completions.create(
|
165
|
-
model=model,
|
166
|
-
temperature=0.0,
|
167
|
-
messages=messages,
|
168
|
-
)
|
169
|
-
|
170
|
-
return response.choices[0].message.content
|
102
|
+
return generate_test_result_description(
|
103
|
+
{
|
104
|
+
"test_name": test_name,
|
105
|
+
"test_description": test_description,
|
106
|
+
"title": title,
|
107
|
+
"summary": _truncate_summary(summary, test_id),
|
108
|
+
"figures": [
|
109
|
+
figure._get_b64_url() for figure in ([] if tables else figures)
|
110
|
+
],
|
111
|
+
"context": _get_llm_global_context(),
|
112
|
+
}
|
113
|
+
)["content"]
|
171
114
|
|
172
115
|
|
173
116
|
def background_generate_description(
|
@@ -238,7 +181,8 @@ def get_result_description(
|
|
238
181
|
# Check the feature flag first, then the environment variable
|
239
182
|
llm_descriptions_enabled = (
|
240
183
|
client_config.can_generate_llm_test_descriptions()
|
241
|
-
and os.getenv("VALIDMIND_LLM_DESCRIPTIONS_ENABLED", "1")
|
184
|
+
and os.getenv("VALIDMIND_LLM_DESCRIPTIONS_ENABLED", "1").lower()
|
185
|
+
not in ["0", "false"]
|
242
186
|
)
|
243
187
|
|
244
188
|
# TODO: fix circular import
|
validmind/api_client.py
CHANGED
@@ -194,6 +194,7 @@ def init(
|
|
194
194
|
api_host: Optional[str] = None,
|
195
195
|
model: Optional[str] = None,
|
196
196
|
monitoring: bool = False,
|
197
|
+
generate_descriptions: Optional[bool] = None,
|
197
198
|
):
|
198
199
|
"""
|
199
200
|
Initializes the API client instances and calls the /ping endpoint to ensure
|
@@ -209,7 +210,7 @@ def init(
|
|
209
210
|
api_secret (str, optional): The API secret. Defaults to None.
|
210
211
|
api_host (str, optional): The API host. Defaults to None.
|
211
212
|
monitoring (bool): The ongoing monitoring flag. Defaults to False.
|
212
|
-
|
213
|
+
generate_descriptions (bool): Whether to use GenAI to generate test result descriptions. Defaults to True.
|
213
214
|
Raises:
|
214
215
|
ValueError: If the API key and secret are not provided
|
215
216
|
"""
|
@@ -235,6 +236,9 @@ def init(
|
|
235
236
|
|
236
237
|
_monitoring = monitoring
|
237
238
|
|
239
|
+
if generate_descriptions is not None:
|
240
|
+
os.environ["VALIDMIND_LLM_DESCRIPTIONS_ENABLED"] = str(generate_descriptions)
|
241
|
+
|
238
242
|
reload()
|
239
243
|
|
240
244
|
|
@@ -487,3 +491,16 @@ def get_ai_key() -> Dict[str, Any]:
|
|
487
491
|
raise_api_error(r.text)
|
488
492
|
|
489
493
|
return r.json()
|
494
|
+
|
495
|
+
|
496
|
+
def generate_test_result_description(test_result_data: Dict[str, Any]) -> str:
|
497
|
+
r = requests.post(
|
498
|
+
url=_get_url("ai/generate/test_result_description"),
|
499
|
+
headers=_get_api_headers(),
|
500
|
+
json=test_result_data,
|
501
|
+
)
|
502
|
+
|
503
|
+
if r.status_code != 200:
|
504
|
+
raise_api_error(r.text)
|
505
|
+
|
506
|
+
return r.json()
|
validmind/errors.py
CHANGED
@@ -228,7 +228,7 @@ class MissingRExtrasError(BaseError):
|
|
228
228
|
def description(self, *args, **kwargs):
|
229
229
|
return (
|
230
230
|
self.message
|
231
|
-
or "
|
231
|
+
or "`rpy2` is required to use R models. Please install it with `pip install rpy2`"
|
232
232
|
)
|
233
233
|
|
234
234
|
|
validmind/models/r_model.py
CHANGED
@@ -5,6 +5,7 @@
|
|
5
5
|
import numpy as np
|
6
6
|
import pandas as pd
|
7
7
|
|
8
|
+
from validmind.errors import MissingRExtrasError
|
8
9
|
from validmind.logging import get_logger
|
9
10
|
from validmind.vm_models.model import VMModel
|
10
11
|
|
@@ -125,7 +126,10 @@ class RModel(VMModel):
|
|
125
126
|
"""
|
126
127
|
Converts the predicted probabilities to classes
|
127
128
|
"""
|
128
|
-
|
129
|
+
try:
|
130
|
+
from rpy2.robjects import pandas2ri
|
131
|
+
except ImportError:
|
132
|
+
raise MissingRExtrasError()
|
129
133
|
|
130
134
|
# Activate the pandas conversion for rpy2
|
131
135
|
pandas2ri.activate()
|
validmind/tests/comparison.py
CHANGED
@@ -15,7 +15,7 @@ from validmind.vm_models.figure import (
|
|
15
15
|
is_png_image,
|
16
16
|
)
|
17
17
|
from validmind.vm_models.input import VMInput
|
18
|
-
from validmind.vm_models.result import ResultTable, TestResult
|
18
|
+
from validmind.vm_models.result import RawData, ResultTable, TestResult
|
19
19
|
|
20
20
|
logger = get_logger(__name__)
|
21
21
|
|
@@ -312,6 +312,25 @@ def get_comparison_test_configs(
|
|
312
312
|
return test_configs
|
313
313
|
|
314
314
|
|
315
|
+
def _combine_raw_data(results: List[TestResult]) -> RawData:
|
316
|
+
"""Combine RawData objects"""
|
317
|
+
attribute_names = results[0].raw_data.__dict__.keys()
|
318
|
+
|
319
|
+
# check that all the raw data objects have the same attributes
|
320
|
+
for result in results:
|
321
|
+
if not isinstance(result.raw_data, RawData):
|
322
|
+
raise ValueError("All raw data objects must be of type RawData")
|
323
|
+
if result.raw_data.__dict__.keys() != attribute_names:
|
324
|
+
raise ValueError("RawData objects must have the same attributes")
|
325
|
+
|
326
|
+
return RawData(
|
327
|
+
**{
|
328
|
+
key: [getattr(result.raw_data, key) for result in results]
|
329
|
+
for key in attribute_names
|
330
|
+
}
|
331
|
+
)
|
332
|
+
|
333
|
+
|
315
334
|
def combine_results(
|
316
335
|
results: List[TestResult],
|
317
336
|
) -> Tuple[List[Any], Dict[str, List[Any]], Dict[str, List[Any]]]:
|
@@ -338,6 +357,9 @@ def combine_results(
|
|
338
357
|
# handle threshold tests (i.e. tests that have pass/fail bool status)
|
339
358
|
if results[0].passed is not None:
|
340
359
|
combined_outputs.append(all(result.passed for result in results))
|
360
|
+
# handle raw data (if any)
|
361
|
+
if results[0].raw_data:
|
362
|
+
combined_outputs.append(_combine_raw_data(results))
|
341
363
|
|
342
364
|
# combine inputs and params
|
343
365
|
combined_inputs = {}
|
@@ -359,4 +381,8 @@ def combine_results(
|
|
359
381
|
combined_inputs = _combine_dict_values(combined_inputs)
|
360
382
|
combined_params = _combine_dict_values(combined_params)
|
361
383
|
|
362
|
-
return
|
384
|
+
return (
|
385
|
+
tuple(combined_outputs),
|
386
|
+
combined_inputs,
|
387
|
+
combined_params,
|
388
|
+
)
|
@@ -52,16 +52,10 @@ def FeatureTargetCorrelationPlot(dataset, fig_height=600):
|
|
52
52
|
- Not apt for models that employ complex feature interactions, like Decision Trees or Neural Networks, as the test
|
53
53
|
may not accurately reflect their importance.
|
54
54
|
"""
|
55
|
-
|
56
|
-
# Filter DataFrame based on features and target_column
|
57
55
|
df = dataset.df[dataset.feature_columns + [dataset.target_column]]
|
58
56
|
|
59
|
-
fig = _visualize_feature_target_correlation(
|
60
|
-
|
61
|
-
correlations = (
|
62
|
-
df.corr(numeric_only=True)[dataset.target_column]
|
63
|
-
.drop(dataset.target_column)
|
64
|
-
.to_frame()
|
57
|
+
fig, correlations = _visualize_feature_target_correlation(
|
58
|
+
df, dataset.target_column, fig_height
|
65
59
|
)
|
66
60
|
|
67
61
|
return fig, RawData(correlation_data=correlations)
|
@@ -100,4 +94,5 @@ def _visualize_feature_target_correlation(df, target_column, fig_height):
|
|
100
94
|
yaxis_title="",
|
101
95
|
height=fig_height, # Adjust the height value as needed
|
102
96
|
)
|
103
|
-
|
97
|
+
|
98
|
+
return fig, correlations
|
@@ -118,4 +118,4 @@ def ContextualRecall(dataset, model):
|
|
118
118
|
# Create a DataFrame from all collected statistics
|
119
119
|
result_df = pd.DataFrame(stats_df).reset_index().rename(columns={"index": "Metric"})
|
120
120
|
|
121
|
-
return (result_df, *
|
121
|
+
return (result_df, *figures, RawData(contextual_recall_scores=metrics_df))
|
@@ -117,4 +117,4 @@ def MeteorScore(dataset, model):
|
|
117
117
|
# Create a DataFrame from all collected statistics
|
118
118
|
result_df = pd.DataFrame(stats_df).reset_index().rename(columns={"index": "Metric"})
|
119
119
|
|
120
|
-
return (result_df, *
|
120
|
+
return (result_df, *figures, RawData(meteor_scores=metrics_df))
|
@@ -3,7 +3,6 @@
|
|
3
3
|
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
4
|
|
5
5
|
import numpy as np
|
6
|
-
import pandas as pd
|
7
6
|
import plotly.express as px
|
8
7
|
from sklearn.metrics.pairwise import cosine_similarity
|
9
8
|
|
@@ -18,14 +17,12 @@ def create_stability_analysis_result(
|
|
18
17
|
original_embeddings, perturbed_embeddings
|
19
18
|
).diagonal()
|
20
19
|
|
21
|
-
#
|
22
|
-
raw_data =
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
}
|
28
|
-
)
|
20
|
+
# Store raw data in a dictionary
|
21
|
+
raw_data = {
|
22
|
+
"original_embeddings": original_embeddings,
|
23
|
+
"perturbed_embeddings": perturbed_embeddings,
|
24
|
+
"similarities": similarities,
|
25
|
+
}
|
29
26
|
|
30
27
|
mean = np.mean(similarities)
|
31
28
|
passed = mean > mean_similarity_threshold
|
@@ -4,24 +4,25 @@
|
|
4
4
|
|
5
5
|
import os
|
6
6
|
|
7
|
-
from validmind.ai.utils import get_client_and_model
|
8
|
-
from validmind.client_config import client_config
|
7
|
+
from validmind.ai.utils import get_client_and_model, is_configured
|
9
8
|
|
10
9
|
EMBEDDINGS_MODEL = "text-embedding-3-small"
|
11
10
|
|
12
11
|
|
13
12
|
def get_ragas_config():
|
14
|
-
if not client_config.can_generate_llm_test_descriptions():
|
15
|
-
raise ValueError(
|
16
|
-
"LLM based descriptions are not enabled in the current configuration."
|
17
|
-
)
|
18
|
-
|
19
13
|
# import here since its an optional dependency
|
20
14
|
try:
|
21
15
|
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
|
22
16
|
except ImportError:
|
23
17
|
raise ImportError("Please run `pip install validmind[llm]` to use LLM tests")
|
24
18
|
|
19
|
+
if not is_configured():
|
20
|
+
raise ValueError(
|
21
|
+
"LLM is not configured. Please set an `OPENAI_API_KEY` environment variable "
|
22
|
+
"or ensure that you are connected to the ValidMind API and ValidMind AI is "
|
23
|
+
"enabled for your account."
|
24
|
+
)
|
25
|
+
|
25
26
|
client, model = get_client_and_model()
|
26
27
|
os.environ["OPENAI_API_BASE"] = str(client.base_url)
|
27
28
|
|
@@ -62,18 +62,14 @@ def MinimumROCAUCScore(dataset: VMDataset, model: VMModel, min_threshold: float
|
|
62
62
|
lb = LabelBinarizer()
|
63
63
|
lb.fit(y_true)
|
64
64
|
|
65
|
-
y_true_binarized = lb.transform(y_true)
|
66
|
-
y_score_binarized = lb.transform(dataset.y_pred(model))
|
67
|
-
|
68
65
|
roc_auc = roc_auc_score(
|
69
|
-
y_true=
|
70
|
-
y_score=
|
66
|
+
y_true=lb.transform(y_true),
|
67
|
+
y_score=lb.transform(dataset.y_pred(model)),
|
71
68
|
average="macro",
|
72
69
|
)
|
73
70
|
|
74
71
|
else:
|
75
|
-
|
76
|
-
roc_auc = roc_auc_score(y_true=y_true, y_score=y_score_prob)
|
72
|
+
roc_auc = roc_auc_score(y_true=y_true, y_score=dataset.y_prob(model))
|
77
73
|
|
78
74
|
return [
|
79
75
|
{
|
@@ -45,17 +45,17 @@ def RegressionModelSummary(dataset: VMDataset, model: VMModel):
|
|
45
45
|
- A high R-Squared or Adjusted R-Squared may not necessarily indicate a good model, especially in cases of
|
46
46
|
overfitting.
|
47
47
|
"""
|
48
|
+
y_true = dataset.y
|
49
|
+
y_pred = dataset.y_pred(model)
|
50
|
+
|
48
51
|
return [
|
49
52
|
{
|
50
53
|
"Independent Variables": dataset.feature_columns,
|
51
|
-
"R-Squared": r2_score(
|
54
|
+
"R-Squared": r2_score(y_true, y_pred),
|
52
55
|
"Adjusted R-Squared": adj_r2_score(
|
53
|
-
dataset.
|
54
|
-
dataset.y_pred(model),
|
55
|
-
len(dataset.y),
|
56
|
-
len(dataset.feature_columns),
|
56
|
+
y_true, y_pred, len(y_true), len(dataset.feature_columns)
|
57
57
|
),
|
58
|
-
"MSE": mean_squared_error(
|
59
|
-
"RMSE": mean_squared_error(
|
58
|
+
"MSE": mean_squared_error(y_true, y_pred, squared=True),
|
59
|
+
"RMSE": mean_squared_error(y_true, y_pred, squared=False),
|
60
60
|
}
|
61
61
|
]
|
@@ -4,8 +4,7 @@
|
|
4
4
|
|
5
5
|
import re
|
6
6
|
|
7
|
-
from validmind.ai.utils import get_client_and_model
|
8
|
-
from validmind.client_config import client_config
|
7
|
+
from validmind.ai.utils import get_client_and_model, is_configured
|
9
8
|
|
10
9
|
missing_prompt_message = """
|
11
10
|
Cannot run prompt validation tests on a model with no prompt.
|
@@ -25,9 +24,11 @@ def call_model(
|
|
25
24
|
system_prompt: str, user_prompt: str, temperature: float = 0.0, seed: int = 42
|
26
25
|
):
|
27
26
|
"""Call LLM with the given prompts and return the response"""
|
28
|
-
if not
|
27
|
+
if not is_configured():
|
29
28
|
raise ValueError(
|
30
|
-
"LLM
|
29
|
+
"LLM is not configured. Please set an `OPENAI_API_KEY` environment variable "
|
30
|
+
"or ensure that you are connected to the ValidMind API and ValidMind AI is "
|
31
|
+
"enabled for your account."
|
31
32
|
)
|
32
33
|
|
33
34
|
client, model = get_client_and_model()
|
validmind/tests/run.py
CHANGED
@@ -256,7 +256,7 @@ def _run_comparison_test(
|
|
256
256
|
combined_outputs, combined_inputs, combined_params = combine_results(results)
|
257
257
|
|
258
258
|
return build_test_result(
|
259
|
-
outputs=
|
259
|
+
outputs=combined_outputs,
|
260
260
|
test_id=test_id,
|
261
261
|
test_doc=test_doc,
|
262
262
|
inputs=combined_inputs,
|
@@ -400,5 +400,9 @@ def run_test( # noqa: C901
|
|
400
400
|
|
401
401
|
|
402
402
|
def print_env():
|
403
|
+
"""Prints a log of the running environment for debugging.
|
404
|
+
|
405
|
+
Output includes: ValidMind Library version, operating system details, installed dependencies, and the ISO 8601 timestamp at log creation.
|
406
|
+
"""
|
403
407
|
e = _get_run_metadata()
|
404
408
|
pprint.pp(e)
|
validmind/utils.py
CHANGED
@@ -60,6 +60,19 @@ pylab.rcParams.update(params)
|
|
60
60
|
logger = get_logger(__name__)
|
61
61
|
|
62
62
|
|
63
|
+
def parse_version(version: str) -> tuple[int, ...]:
|
64
|
+
"""
|
65
|
+
Parse a semver version string into a tuple of major, minor, patch integers
|
66
|
+
|
67
|
+
Args:
|
68
|
+
version (str): The semantic version string to parse
|
69
|
+
|
70
|
+
Returns:
|
71
|
+
tuple[int, ...]: A tuple of major, minor, patch integers
|
72
|
+
"""
|
73
|
+
return tuple(int(x) for x in version.split(".")[:3])
|
74
|
+
|
75
|
+
|
63
76
|
def is_notebook() -> bool:
|
64
77
|
"""
|
65
78
|
Checks if the code is running in a Jupyter notebook or IPython shell
|
@@ -110,6 +123,7 @@ class NumpyEncoder(json.JSONEncoder):
|
|
110
123
|
self.is_numpy_ndarray: lambda obj: obj.tolist(),
|
111
124
|
self.is_numpy_bool: lambda obj: bool(obj),
|
112
125
|
self.is_pandas_timestamp: lambda obj: str(obj),
|
126
|
+
self.is_numpy_datetime64: lambda obj: str(obj),
|
113
127
|
self.is_set: lambda obj: list(obj),
|
114
128
|
self.is_quantlib_date: lambda obj: obj.ISO(),
|
115
129
|
self.is_generic_object: self.handle_generic_object,
|
@@ -142,6 +156,9 @@ class NumpyEncoder(json.JSONEncoder):
|
|
142
156
|
def is_pandas_timestamp(self, obj):
|
143
157
|
return isinstance(obj, pd.Timestamp)
|
144
158
|
|
159
|
+
def is_numpy_datetime64(self, obj):
|
160
|
+
return isinstance(obj, np.datetime64)
|
161
|
+
|
145
162
|
def is_set(self, obj):
|
146
163
|
return isinstance(obj, set)
|
147
164
|
|
@@ -152,11 +169,12 @@ class NumpyEncoder(json.JSONEncoder):
|
|
152
169
|
return isinstance(obj, object)
|
153
170
|
|
154
171
|
def handle_generic_object(self, obj):
|
155
|
-
|
156
|
-
obj
|
157
|
-
|
158
|
-
|
159
|
-
|
172
|
+
try:
|
173
|
+
if hasattr(obj, "__str__"):
|
174
|
+
return obj.__str__()
|
175
|
+
return obj.__class__.__name__
|
176
|
+
except Exception:
|
177
|
+
return str(type(obj).__name__)
|
160
178
|
|
161
179
|
def encode(self, obj):
|
162
180
|
obj = nan_to_none(obj)
|
@@ -177,6 +195,18 @@ class HumanReadableEncoder(NumpyEncoder):
|
|
177
195
|
else obj.tolist()
|
178
196
|
)
|
179
197
|
|
198
|
+
def default(self, obj):
|
199
|
+
if self.is_dataframe(obj):
|
200
|
+
return {
|
201
|
+
"type": str(type(obj)),
|
202
|
+
"preview": obj.head(5).to_dict(orient="list"),
|
203
|
+
"shape": f"{obj.shape[0]} rows x {obj.shape[1]} columns",
|
204
|
+
}
|
205
|
+
return super().default(obj)
|
206
|
+
|
207
|
+
def is_dataframe(self, obj):
|
208
|
+
return isinstance(obj, pd.DataFrame)
|
209
|
+
|
180
210
|
|
181
211
|
def get_full_typename(o: Any) -> Any:
|
182
212
|
"""We determine types based on type names so we don't have to import
|
@@ -171,6 +171,7 @@ class TestResult(Result):
|
|
171
171
|
metadata: Optional[Dict[str, Any]] = None
|
172
172
|
_was_description_generated: bool = False
|
173
173
|
_unsafe: bool = False
|
174
|
+
_client_config_cache: Optional[Any] = None
|
174
175
|
|
175
176
|
def __post_init__(self):
|
176
177
|
if self.ref_id is None:
|
@@ -329,13 +330,50 @@ class TestResult(Result):
|
|
329
330
|
|
330
331
|
return VBox(widgets)
|
331
332
|
|
333
|
+
@classmethod
|
334
|
+
def _get_client_config(cls):
|
335
|
+
"""Get the client config, loading it if not cached"""
|
336
|
+
if cls._client_config_cache is None:
|
337
|
+
api_client.reload()
|
338
|
+
cls._client_config_cache = api_client.client_config
|
339
|
+
|
340
|
+
if cls._client_config_cache is None:
|
341
|
+
raise ValueError(
|
342
|
+
"Failed to load client config: api_client.client_config is None"
|
343
|
+
)
|
344
|
+
|
345
|
+
if not hasattr(cls._client_config_cache, "documentation_template"):
|
346
|
+
raise ValueError(
|
347
|
+
"Invalid client config: missing documentation_template"
|
348
|
+
)
|
349
|
+
|
350
|
+
return cls._client_config_cache
|
351
|
+
|
352
|
+
def check_result_id_exist(self):
|
353
|
+
"""Check if the result_id exists in any test block across all sections"""
|
354
|
+
client_config = self._get_client_config()
|
355
|
+
|
356
|
+
# Iterate through all sections
|
357
|
+
for section in client_config.documentation_template["sections"]:
|
358
|
+
blocks = section.get("contents", [])
|
359
|
+
# Check each block in the section
|
360
|
+
for block in blocks:
|
361
|
+
if (
|
362
|
+
block.get("content_type") == "test"
|
363
|
+
and block.get("content_id") == self.result_id
|
364
|
+
):
|
365
|
+
return
|
366
|
+
|
367
|
+
logger.info(
|
368
|
+
f"Test driven block with result_id {self.result_id} does not exist in model's document"
|
369
|
+
)
|
370
|
+
|
332
371
|
def _validate_section_id_for_block(
|
333
372
|
self, section_id: str, position: Union[int, None] = None
|
334
373
|
):
|
335
374
|
"""Validate the section_id exits on the template before logging"""
|
336
|
-
|
375
|
+
client_config = self._get_client_config()
|
337
376
|
found = False
|
338
|
-
client_config = api_client.client_config
|
339
377
|
|
340
378
|
for section in client_config.documentation_template["sections"]:
|
341
379
|
if section["id"] == section_id:
|
@@ -440,6 +478,9 @@ class TestResult(Result):
|
|
440
478
|
unsafe (bool): If True, log the result even if it contains sensitive data
|
441
479
|
i.e. raw data from input datasets
|
442
480
|
"""
|
481
|
+
|
482
|
+
self.check_result_id_exist()
|
483
|
+
|
443
484
|
if not unsafe:
|
444
485
|
for table in self.tables or []:
|
445
486
|
check_for_sensitive_data(table.data, self._get_flat_inputs())
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.3
|
2
2
|
Name: validmind
|
3
|
-
Version: 2.
|
3
|
+
Version: 2.8.10
|
4
4
|
Summary: ValidMind Library
|
5
5
|
License: Commercial License
|
6
6
|
Author: Andres Rodriguez
|
@@ -15,8 +15,8 @@ Provides-Extra: all
|
|
15
15
|
Provides-Extra: huggingface
|
16
16
|
Provides-Extra: llm
|
17
17
|
Provides-Extra: pytorch
|
18
|
-
Provides-Extra: r-support
|
19
18
|
Requires-Dist: aiohttp[speedups]
|
19
|
+
Requires-Dist: anywidget (>=0.9.13,<0.10.0)
|
20
20
|
Requires-Dist: arch
|
21
21
|
Requires-Dist: bert-score (>=0.3.13)
|
22
22
|
Requires-Dist: catboost
|
@@ -42,7 +42,6 @@ Requires-Dist: pycocoevalcap (>=1.2,<2.0) ; extra == "all" or extra == "llm"
|
|
42
42
|
Requires-Dist: python-dotenv
|
43
43
|
Requires-Dist: ragas (>=0.2.3) ; extra == "all" or extra == "llm"
|
44
44
|
Requires-Dist: rouge (>=1)
|
45
|
-
Requires-Dist: rpy2 (>=3.5.10,<4.0.0) ; extra == "all" or extra == "r-support"
|
46
45
|
Requires-Dist: scikit-learn (<1.6.0)
|
47
46
|
Requires-Dist: scipy
|
48
47
|
Requires-Dist: scorecardpy (>=0.1.9.6,<0.2.0.0)
|
@@ -53,6 +52,7 @@ Requires-Dist: shap (==0.44.1)
|
|
53
52
|
Requires-Dist: statsmodels
|
54
53
|
Requires-Dist: tabulate (>=0.8.9,<0.9.0)
|
55
54
|
Requires-Dist: textblob (>=0.18.0.post0,<0.19.0)
|
55
|
+
Requires-Dist: tiktoken
|
56
56
|
Requires-Dist: torch (>=1.10.0) ; extra == "all" or extra == "llm" or extra == "pytorch"
|
57
57
|
Requires-Dist: tqdm
|
58
58
|
Requires-Dist: transformers (>=4.32.0,<5.0.0) ; extra == "all" or extra == "huggingface" or extra == "llm"
|
@@ -131,6 +131,7 @@ The ValidMind Library has optional dependencies that can be installed separately
|
|
131
131
|
- **R Models**: To use R models with the ValidMind Library, install the `r` extra:
|
132
132
|
|
133
133
|
```bash
|
134
|
-
pip install validmind
|
134
|
+
pip install validmind
|
135
|
+
pip install rpy2
|
135
136
|
```
|
136
137
|
|
@@ -1,13 +1,8 @@
|
|
1
|
-
validmind/__init__.py,sha256=
|
2
|
-
validmind/__version__.py,sha256=
|
3
|
-
validmind/ai/test_descriptions.py,sha256=
|
4
|
-
validmind/ai/test_result_description/config.yaml,sha256=E1gPd-uv-MzdrWZA_rP6LSk8pVmkYijx6v78hZ8ceL0,787
|
5
|
-
validmind/ai/test_result_description/context.py,sha256=ebKulFMpXTDLqd6lOHAsG200GmLNnhnu7sMDnbo2Dhc,2339
|
6
|
-
validmind/ai/test_result_description/image_processing.py,sha256=JNaO1zyM9293WWuyzUp1meQQbHuut0XN4kKUGzQTwYY,4061
|
7
|
-
validmind/ai/test_result_description/system.jinja,sha256=BjMvZCC3UXEH8p3VPpnHtGjhnqnbNcEG2_kYZ_QZrgg,2358
|
8
|
-
validmind/ai/test_result_description/user.jinja,sha256=CmqPQQiqdXjxtq47wFCZ-IT5csliWsRVM04psKxzXc4,689
|
1
|
+
validmind/__init__.py,sha256=4ukEysjRwf6X2RcK8_OVY-z5fTjnM4GQIiVERFyzRPY,4194
|
2
|
+
validmind/__version__.py,sha256=93aDjSnP93wggEKIISA3t5wll8Pc68HgFa7UDIxqdwo,23
|
3
|
+
validmind/ai/test_descriptions.py,sha256=VQ2LaWXvSEQZ8569TN9DYDfgDLlMv96wIjUb9MGJlHk,6882
|
9
4
|
validmind/ai/utils.py,sha256=YHqXtmACjcL5imDS9_nzmz8MhQJzK3VybcDXMbj1SbQ,4168
|
10
|
-
validmind/api_client.py,sha256=
|
5
|
+
validmind/api_client.py,sha256=II9dggnuZwagZQaTHMqCkfS9fKrMCZO0cF6at0kBO2w,15456
|
11
6
|
validmind/client.py,sha256=lOv4lSZGDOUMxOa2FpNgAiT_GaEolffZTfvljewhl2I,18595
|
12
7
|
validmind/client_config.py,sha256=CzbeS9GZrgAdx-6DssRK5XwEMOcujQVRgji6EtAzxYI,1358
|
13
8
|
validmind/datasets/__init__.py,sha256=c0hQZN_6GrUEJxdFHdQaEsQrSYNABG84ZCY0H-PzOZk,260
|
@@ -64,7 +59,7 @@ validmind/datasets/regression/models/fred_loan_rates_model_2.pkl,sha256=J1ukMdeF
|
|
64
59
|
validmind/datasets/regression/models/fred_loan_rates_model_3.pkl,sha256=IogZPcUQc1F_v11fR6KWT-nRt5JzvK5f7p4Hrw7vLps,40063
|
65
60
|
validmind/datasets/regression/models/fred_loan_rates_model_4.pkl,sha256=cSxhpcrI4hCbxCwZwE2-nr7KObbWpDii3NzpECoXmmM,48292
|
66
61
|
validmind/datasets/regression/models/fred_loan_rates_model_5.pkl,sha256=FkNLHq9xkPMbYks_vyMjFL371mw9SQYbP1iX9lY4Ljo,60343
|
67
|
-
validmind/errors.py,sha256=
|
62
|
+
validmind/errors.py,sha256=_zM-CABfIGz6hirW4nuukyOgYkFCZ8xh55Jn5DSjd6g,8074
|
68
63
|
validmind/html_templates/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
69
64
|
validmind/html_templates/content_blocks.py,sha256=vFMRS4Ogq4RZq88WzG3teNEOq3U4OLgLDzD3lBx4h-g,4050
|
70
65
|
validmind/input_registry.py,sha256=8C_mrhgLT72hwbt_lo3ZwXb5NCyIcSuCQI1HdJ3bK2A,1042
|
@@ -76,7 +71,7 @@ validmind/models/huggingface.py,sha256=DMHekLpWi6c4N0svh-3G0NYYwzxPXOvqiU95M4Qff
|
|
76
71
|
validmind/models/metadata.py,sha256=PMcdYuACkSPvuG8io5BhZeMwclQr_q79mXbvd1SC-7I,1665
|
77
72
|
validmind/models/pipeline.py,sha256=nSskKWxaS4SGmx_B0IAvS5ogDZyh6tdx_aUkyxSXt88,2051
|
78
73
|
validmind/models/pytorch.py,sha256=aAEUWtISwLh-PMvHkcLwBEbBStAByt4J-NpK-Ndv38E,1826
|
79
|
-
validmind/models/r_model.py,sha256=
|
74
|
+
validmind/models/r_model.py,sha256=TPUwPmxz3cNzJ1bAA5vz6P9xS6deVcLTuIO1e7rD1vY,7306
|
80
75
|
validmind/models/sklearn.py,sha256=lOCJlP2wvd5IJHtBS1XG9FXrtIvO_f8xm2Qp1UdsiBw,2406
|
81
76
|
validmind/template.py,sha256=-j7UmM9v7I_VIZltWrmX5scbeDTbRDrR7hTQUzy6AVg,7307
|
82
77
|
validmind/test_suites/__init__.py,sha256=wC_ZgVykFZAOrs1LTM7bE8r7mTSjxF54F9amUdT3nT4,6953
|
@@ -95,7 +90,7 @@ validmind/test_suites/time_series.py,sha256=3hzWc9gXHBf8aMecD-1YYGFS5RI1o8A-eor9
|
|
95
90
|
validmind/tests/__init__.py,sha256=U6wUS7R8lYFjwUZmAkG7gC8Av9Z4TTDZS7uWZqbzxVM,1665
|
96
91
|
validmind/tests/__types__.py,sha256=3if3CVI-YHWQpX0t_8WYh1a6dd01us0oP7r2ZsX2NX0,12589
|
97
92
|
validmind/tests/_store.py,sha256=p1qBYOkdphd1aOWwnL6ybPfz3qSWJs2engLv-zFVl5A,2368
|
98
|
-
validmind/tests/comparison.py,sha256=
|
93
|
+
validmind/tests/comparison.py,sha256=7LJFr_6sVVOAxLRX1LYylJ8oZF0UdrVV7tFfWIUfcWI,13336
|
99
94
|
validmind/tests/data_validation/ACFandPACFPlot.py,sha256=Xo_xildBWlKJSbycH-IqXh5PkpBtEFym5WN_m9VKDwU,4276
|
100
95
|
validmind/tests/data_validation/ADF.py,sha256=UHIYCJuqQYJTtOvdzj6ALJcLH8kCprr0PlfESIx_G_E,3969
|
101
96
|
validmind/tests/data_validation/AutoAR.py,sha256=QQGezpGkYS0m0isT3BDhBAGMh8CS8ZU2HsU3G4p9lY0,5238
|
@@ -111,7 +106,7 @@ validmind/tests/data_validation/DescriptiveStatistics.py,sha256=l6Ne4y5A9HC2vlPs
|
|
111
106
|
validmind/tests/data_validation/DickeyFullerGLS.py,sha256=8zR7I77082RfYOyOlmYX7OLURXJ8_G1Yh9Esb27FzFQ,4193
|
112
107
|
validmind/tests/data_validation/Duplicates.py,sha256=HAEHRFwFZovJU-wBWea0KJREsJCd9R8jarazxJ3ZqI8,3219
|
113
108
|
validmind/tests/data_validation/EngleGrangerCoint.py,sha256=kNBbxLYweF8qTF5JVRzcyXq3aKLhkN_1iv3mwwskTBU,4503
|
114
|
-
validmind/tests/data_validation/FeatureTargetCorrelationPlot.py,sha256=
|
109
|
+
validmind/tests/data_validation/FeatureTargetCorrelationPlot.py,sha256=IEQYOakooUJRLe1CghVk0u4RJdLZ3IA-nnYveqgKJVE,4281
|
115
110
|
validmind/tests/data_validation/HighCardinality.py,sha256=Li6xa46crnmdx9XliuHbDDVH4_KzcUT4T7Z16cRYQnY,3546
|
116
111
|
validmind/tests/data_validation/HighPearsonCorrelation.py,sha256=f8AtgstLTY5msFfDkCEe3hScHmVr8A2rL4OgdJSdkYQ,3779
|
117
112
|
validmind/tests/data_validation/IQROutliersBarPlot.py,sha256=BrhqjiV5EpQAgPI_6GUdF5NLA3O5GbJk50dXbi-9G0U,5182
|
@@ -172,9 +167,9 @@ validmind/tests/load.py,sha256=UpMHEMolnwQ8z3EYNJvNj186H3JUgQvz_eRCBrngtBo,11003
|
|
172
167
|
validmind/tests/model_validation/BertScore.py,sha256=R6jnQ9cDKbHpaJFtf2wTHkWtKX_DCRzNm6NkOmgGLJ8,5757
|
173
168
|
validmind/tests/model_validation/BleuScore.py,sha256=xFPkmxVbM4ST3Bvi5IH9Sh_BJU_Civ-Slh66AoZgRec,5134
|
174
169
|
validmind/tests/model_validation/ClusterSizeDistribution.py,sha256=mZcLjbHjGJ4ltt5juyuGRC5Fm7oqDB6ZPBxCvd1-h_k,3284
|
175
|
-
validmind/tests/model_validation/ContextualRecall.py,sha256=
|
170
|
+
validmind/tests/model_validation/ContextualRecall.py,sha256=0W1xkiDff09ql7x9N0JW4qafCKyLdCttbbybDTJE9pQ,5256
|
176
171
|
validmind/tests/model_validation/FeaturesAUC.py,sha256=LWURhngE04h3r_UA0JQIyoHMYsrJTRQHHlmK2UnR0P4,4010
|
177
|
-
validmind/tests/model_validation/MeteorScore.py,sha256=
|
172
|
+
validmind/tests/model_validation/MeteorScore.py,sha256=PXLfyZ1tFDoYfZLniAxOpfSRJDFvGdaUm5LKH_5XotA,5246
|
178
173
|
validmind/tests/model_validation/ModelMetadata.py,sha256=UiDANMTqAy0DURnnTzImYNS-3Z8sE4yFjg1c2S5YFS8,2521
|
179
174
|
validmind/tests/model_validation/ModelPredictionResiduals.py,sha256=mZmE0RWTUHigJJ3EvrzWcHjb5nvePYGH2PvZTQl2ZNc,3524
|
180
175
|
validmind/tests/model_validation/RegardScore.py,sha256=cE3BmI1SQ3rlRbrREio8mooaG2BQ0BC_AHMFexbJ3Xk,5562
|
@@ -184,7 +179,7 @@ validmind/tests/model_validation/TimeSeriesPredictionWithCI.py,sha256=JanMDaf-76
|
|
184
179
|
validmind/tests/model_validation/TimeSeriesPredictionsPlot.py,sha256=zoxONIl_a_mfOLuxnfVEMNQ3jL72pp5J4BDAQP5e-P0,2280
|
185
180
|
validmind/tests/model_validation/TimeSeriesR2SquareBySegments.py,sha256=JIyER-vwXnQyHdxmpCULYm4iMkB_LaLHHQuzyygumbw,4000
|
186
181
|
validmind/tests/model_validation/TokenDisparity.py,sha256=_gn-LSghJa8hBtCKxfNKw7rAC8h1uyrFhcX8BIaPoUs,4422
|
187
|
-
validmind/tests/model_validation/ToxicityScore.py,sha256=
|
182
|
+
validmind/tests/model_validation/ToxicityScore.py,sha256=s-EPOn6Apo7dD_bw0wcmTez3P-lv6hfLV61UHuLMsgg,5465
|
188
183
|
validmind/tests/model_validation/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
189
184
|
validmind/tests/model_validation/embeddings/ClusterDistribution.py,sha256=q0v88QINg5BQWuFbQrvVb_lP-lLEQt_h1f3Af4TX6Bc,3056
|
190
185
|
validmind/tests/model_validation/embeddings/CosineSimilarityComparison.py,sha256=Xtycolawy2Z7hhShfdlLpYvpc1lgQrISCL75ltr8s1g,5138
|
@@ -196,11 +191,11 @@ validmind/tests/model_validation/embeddings/EuclideanDistanceComparison.py,sha25
|
|
196
191
|
validmind/tests/model_validation/embeddings/EuclideanDistanceHeatmap.py,sha256=TvB2yi5C1OXtCgiXy4z9CBGbUiNEwEVzgG69gmpb3hQ,3393
|
197
192
|
validmind/tests/model_validation/embeddings/PCAComponentsPairwisePlots.py,sha256=lF4QC5Ueihxt-ON1GyUTwVJ9_qPizbtvt9OoPt39Gi4,3919
|
198
193
|
validmind/tests/model_validation/embeddings/StabilityAnalysisKeyword.py,sha256=aGsE1CfpfnNID631H15hnwAXtiDoy-HooIBMNuu8am4,4161
|
199
|
-
validmind/tests/model_validation/embeddings/StabilityAnalysisRandomNoise.py,sha256=
|
200
|
-
validmind/tests/model_validation/embeddings/StabilityAnalysisSynonyms.py,sha256=
|
201
|
-
validmind/tests/model_validation/embeddings/StabilityAnalysisTranslation.py,sha256=
|
194
|
+
validmind/tests/model_validation/embeddings/StabilityAnalysisRandomNoise.py,sha256=pDN9aAjPNsHwo94biDtGBBJk7Daqv9UELIu8lCA_ZAs,5764
|
195
|
+
validmind/tests/model_validation/embeddings/StabilityAnalysisSynonyms.py,sha256=SsFgUYPG8M8RbpcQKZzlx6aG4dG9yWK1zW3fgK52OUc,4652
|
196
|
+
validmind/tests/model_validation/embeddings/StabilityAnalysisTranslation.py,sha256=NDQeTt2Y3r241y3pUYhnkKtAhkOJZmE-8O6xxcW55dI,5760
|
202
197
|
validmind/tests/model_validation/embeddings/TSNEComponentsPairwisePlots.py,sha256=roCTA0QXU8GJoQIQ9X-FKeQ2LjBGwyH5IJTdZFnreRU,4645
|
203
|
-
validmind/tests/model_validation/embeddings/utils.py,sha256=
|
198
|
+
validmind/tests/model_validation/embeddings/utils.py,sha256=Hr8jpVB0YfaOEYsO_tiwhU1UgXoJFHHlRqFcHDNXHoU,1896
|
204
199
|
validmind/tests/model_validation/ragas/AnswerCorrectness.py,sha256=P1-sAWe4KGwgwJ0F44BY9Nd1wvCmaftG6tRZwsi9ero,5496
|
205
200
|
validmind/tests/model_validation/ragas/AspectCritic.py,sha256=KsETxsVxSYAxAMbxyssL-c1AU0qUOFMz7I5D2pu-fY0,6936
|
206
201
|
validmind/tests/model_validation/ragas/ContextEntityRecall.py,sha256=dQCBtiq4BF-C2covBaNxU0Ok_FYTDEN3M0U6ynwsAq0,5328
|
@@ -211,7 +206,7 @@ validmind/tests/model_validation/ragas/Faithfulness.py,sha256=SZCrgzfpcatfva9sYL
|
|
211
206
|
validmind/tests/model_validation/ragas/NoiseSensitivity.py,sha256=oAcwDhXDEYLT0q3e8quHuYWdp7OJg9ycBvu05k6u6JI,6506
|
212
207
|
validmind/tests/model_validation/ragas/ResponseRelevancy.py,sha256=vsfhXy-DBtIWEGsJZeR9xcE2V6-eLR2ui-qO1-96BLQ,5588
|
213
208
|
validmind/tests/model_validation/ragas/SemanticSimilarity.py,sha256=wjEkCikFLweTdz3Tg5CzikWqusdSsWMwaD_4yntBCb4,4820
|
214
|
-
validmind/tests/model_validation/ragas/utils.py,sha256=
|
209
|
+
validmind/tests/model_validation/ragas/utils.py,sha256=EV3aHvWMOgVs6EIRS0nfOa7BRwpF-eSqlOQfsoZFTC8,3429
|
215
210
|
validmind/tests/model_validation/sklearn/AdjustedMutualInformation.py,sha256=5kk-etKeA7jfo6twQ4JVPEuNvWh0TBhhXUQL7SkrrWM,2858
|
216
211
|
validmind/tests/model_validation/sklearn/AdjustedRandIndex.py,sha256=NCUM80CIFrV4Qm0P0wxMdf20y-BwLnPEJxOiPtv1eGk,2706
|
217
212
|
validmind/tests/model_validation/sklearn/CalibrationCurve.py,sha256=-chYMKediopXV7YVNuaGhtb-RpIdgEsEpL1iy8aAgp4,4159
|
@@ -228,7 +223,7 @@ validmind/tests/model_validation/sklearn/HyperParametersTuning.py,sha256=Z7ibpCy
|
|
228
223
|
validmind/tests/model_validation/sklearn/KMeansClustersOptimization.py,sha256=wisUO4461arN6YPQ2lazbgnI2cOVjbSiDObt3T8IHYY,5279
|
229
224
|
validmind/tests/model_validation/sklearn/MinimumAccuracy.py,sha256=2FVtoEMUJJYUxDW6WwC5agAojtt7FUnO7nwcVaqPKao,2773
|
230
225
|
validmind/tests/model_validation/sklearn/MinimumF1Score.py,sha256=CBOGD_wCqcHgMbKfp5TGTc1H8mJoG3RwMRSOUFHVGDc,3069
|
231
|
-
validmind/tests/model_validation/sklearn/MinimumROCAUCScore.py,sha256=
|
226
|
+
validmind/tests/model_validation/sklearn/MinimumROCAUCScore.py,sha256=rNPD-qxdFKuUs8SWa0yIFIjjt8svPeXWNKWQrLy8uJc,3497
|
232
227
|
validmind/tests/model_validation/sklearn/ModelParameters.py,sha256=CF3cZGJLxiABnf1CQ_u_iX_ylgvpElH3jF2DBXbXZJY,3060
|
233
228
|
validmind/tests/model_validation/sklearn/ModelsPerformanceComparison.py,sha256=wDxGUXgfzLA80wfjoRz7CzHO8NiQfuJyxIfuVFOuLYA,4658
|
234
229
|
validmind/tests/model_validation/sklearn/OverfitDiagnosis.py,sha256=tpPf4lL_mfpMne6ei80QEYUZd1ual8w4SbXTKS_0nfY,9960
|
@@ -261,7 +256,7 @@ validmind/tests/model_validation/statsmodels/RegressionFeatureSignificance.py,sh
|
|
261
256
|
validmind/tests/model_validation/statsmodels/RegressionModelForecastPlot.py,sha256=XpzPACbdKkjP5egxESDUYb7aCZ8_VmJpMHCy3joEHmA,3648
|
262
257
|
validmind/tests/model_validation/statsmodels/RegressionModelForecastPlotLevels.py,sha256=4khg8CsgMWAQZVDOqky7_gcPqDS19d19n0FTTbh1cGM,3127
|
263
258
|
validmind/tests/model_validation/statsmodels/RegressionModelSensitivityPlot.py,sha256=UbtvxJho7yISQdsNcHOjUZwBASTY6TQmIpwLddYEsYo,4704
|
264
|
-
validmind/tests/model_validation/statsmodels/RegressionModelSummary.py,sha256=
|
259
|
+
validmind/tests/model_validation/statsmodels/RegressionModelSummary.py,sha256=3ljShlVkblJ2RNKSaTvsJ9QtdQGac-0ZcP1ne1bq94U,2354
|
265
260
|
validmind/tests/model_validation/statsmodels/RegressionPermutationFeatureImportance.py,sha256=04vCwLMDUguS0z0Krbk9ubzrF5HVq0GGe9JqvKA7qQ4,3915
|
266
261
|
validmind/tests/model_validation/statsmodels/ScorecardHistogram.py,sha256=0hnB6icasRKT_Cl0YxMEpIuaUKgi5scXHmV_nP9RmkI,4650
|
267
262
|
validmind/tests/model_validation/statsmodels/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
@@ -290,8 +285,8 @@ validmind/tests/prompt_validation/NegativeInstruction.py,sha256=qGh_hI21zBfffSjD
|
|
290
285
|
validmind/tests/prompt_validation/Robustness.py,sha256=4FShTFGzHcfg9ePfd_lbVt28MOmvlQlL7lF2VZALO2U,5695
|
291
286
|
validmind/tests/prompt_validation/Specificity.py,sha256=nBKoBvE4vFisXNZX085h0rJN_m1cS9c05J4Ku53tuwQ,4790
|
292
287
|
validmind/tests/prompt_validation/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
293
|
-
validmind/tests/prompt_validation/ai_powered_test.py,sha256=
|
294
|
-
validmind/tests/run.py,sha256=
|
288
|
+
validmind/tests/prompt_validation/ai_powered_test.py,sha256=pVmJPZVw1t0a3AMcGvBYEDR_sqI1Yu0AZjbshqBUVIg,2302
|
289
|
+
validmind/tests/run.py,sha256=eIdR8If94ctaY25sWfygsLRi_0XoeJjqSZfdivocKU8,13703
|
295
290
|
validmind/tests/test_providers.py,sha256=BceVuM_-bfQ4Zp-a5wwcP_wHeM6IOUpPIq1-MeT2-VY,6250
|
296
291
|
validmind/tests/utils.py,sha256=mQuf1qgewPiE_pFN8iOoPSCGdyFqb4jbMFBVN3S3S2o,3526
|
297
292
|
validmind/unit_metrics/__init__.py,sha256=lXeTJh8uq0TBRQHDBVhzKiHoV2eG9xOkHkI_pDXnkPU,952
|
@@ -311,7 +306,7 @@ validmind/unit_metrics/regression/MeanSquaredError.py,sha256=h-zgtlR3aigQwMGbi55
|
|
311
306
|
validmind/unit_metrics/regression/QuantileLoss.py,sha256=rs0m9w4zIL6daQOHqYY-sEeQs6SDTpd0t3cN_KFZyqA,518
|
312
307
|
validmind/unit_metrics/regression/RSquaredScore.py,sha256=z8-E-KSewvma9nu1OSUv97IfmFLpV5-rOq15jtlxklg,459
|
313
308
|
validmind/unit_metrics/regression/RootMeanSquaredError.py,sha256=uIDsSpy75Z7W3zu4LditvW3mPJIkGxf-PdFQ7szWBZU,603
|
314
|
-
validmind/utils.py,sha256=
|
309
|
+
validmind/utils.py,sha256=3T0uv_6cH0T_48utYvz9xWFPl-5oa0E74RxG5FsreJc,19470
|
315
310
|
validmind/vm_models/__init__.py,sha256=lcqf9q2aRzrVrNN6R--81IkrnSa6BXPbhJ8SnkT_hcI,702
|
316
311
|
validmind/vm_models/dataset/__init__.py,sha256=U4CxZjdoc0dd9u2AqBl5PJh1UVbzXWNrmundmjLF-qE,346
|
317
312
|
validmind/vm_models/dataset/dataset.py,sha256=F6_rc5pjccRLnB7UcIMiGMbD-qMVUW5v4TnZTNSXTbo,26370
|
@@ -321,14 +316,14 @@ validmind/vm_models/input.py,sha256=qLdqz_bktr4v0YcPha2vFdDvmkC-btT1pH9zBIkt1OY,
|
|
321
316
|
validmind/vm_models/model.py,sha256=PRNyrnKihIRtbYt4idLPHf8OCij71Vgc5Xug_oVZfBg,6486
|
322
317
|
validmind/vm_models/result/__init__.py,sha256=Bs5GMGDxiTsxlwCdqxz5LmGkY0_fM6-_0-3tWSRoqps,341
|
323
318
|
validmind/vm_models/result/result.jinja,sha256=Yvovwm5gInCBukFRlvJXNlDIUpl2eFz4dz1lS3Sn_Gc,311
|
324
|
-
validmind/vm_models/result/result.py,sha256=
|
319
|
+
validmind/vm_models/result/result.py,sha256=McmQnFiSUL4zXxB1u2-saoCkTxPMUTQZEtk3ZpmpyEg,15266
|
325
320
|
validmind/vm_models/result/utils.py,sha256=t6g-g1fJ3SU9lHqC1kMeozMkrUnfOMwYAep3Z5XFXNo,5122
|
326
321
|
validmind/vm_models/test_suite/runner.py,sha256=Cpl9WKwHzJD5Zvrh71FzbEhGZkHM0x0MSd4PIwdOLDQ,5427
|
327
322
|
validmind/vm_models/test_suite/summary.py,sha256=Ug3nMvpPL2DSTDujWagWMCrFiW9oDy0AqJL_zXN8pH0,4642
|
328
323
|
validmind/vm_models/test_suite/test.py,sha256=uImjmPlBlLrlVPavsUzbaDK55bvpOn3PuFyWeyYyTac,3908
|
329
324
|
validmind/vm_models/test_suite/test_suite.py,sha256=5Jppt2UXSMgvJ6FO5LIAKA4oN_-hh9SMr8APAFJzk9g,5080
|
330
|
-
validmind-2.
|
331
|
-
validmind-2.
|
332
|
-
validmind-2.
|
333
|
-
validmind-2.
|
334
|
-
validmind-2.
|
325
|
+
validmind-2.8.10.dist-info/LICENSE,sha256=XonPUfwjvrC5Ombl3y-ko0Wubb1xdG_7nzvIbkZRKHw,35772
|
326
|
+
validmind-2.8.10.dist-info/METADATA,sha256=MJDPX24c6xbCA6iNdyA2096lGY2rm6ip1KfBkWLd12A,6058
|
327
|
+
validmind-2.8.10.dist-info/WHEEL,sha256=XbeZDeTWKc1w7CSIyre5aMDU_-PohRwTQceYnisIYYY,88
|
328
|
+
validmind-2.8.10.dist-info/entry_points.txt,sha256=HuW7YyOv9u_OEWpViQXtv0nfoI67uieJHawKWA4Hv9A,76
|
329
|
+
validmind-2.8.10.dist-info/RECORD,,
|
@@ -1,29 +0,0 @@
|
|
1
|
-
id: test_result_description
|
2
|
-
name: Test Result Description
|
3
|
-
description: Generate a description for a test result
|
4
|
-
version: 0.1.0
|
5
|
-
model: gpt-4o
|
6
|
-
temperature: 0.0
|
7
|
-
output_type: markdown
|
8
|
-
prompts:
|
9
|
-
system:
|
10
|
-
role: system
|
11
|
-
path: system.jinja
|
12
|
-
user:
|
13
|
-
role: user
|
14
|
-
path: user.jinja
|
15
|
-
inputs:
|
16
|
-
test_name:
|
17
|
-
description: The name of the test that produced the result (usually the last part of the test ID)
|
18
|
-
type: string
|
19
|
-
test_description:
|
20
|
-
description: The description (docstring) of the test that was run
|
21
|
-
type: string
|
22
|
-
summary:
|
23
|
-
description: The json result summary (i.e. the table(s) returned by the test)
|
24
|
-
type: list
|
25
|
-
optional: true
|
26
|
-
figures:
|
27
|
-
description: A list of base64 encoded images of the figures returned by the test
|
28
|
-
type: list
|
29
|
-
optional: true
|
@@ -1,73 +0,0 @@
|
|
1
|
-
# Copyright © 2023-2024 ValidMind Inc. All rights reserved.
|
2
|
-
# See the LICENSE file in the root of this repository for details.
|
3
|
-
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
|
-
|
5
|
-
import multiprocessing
|
6
|
-
|
7
|
-
MIN_IMAGES_FOR_PARALLEL = 4
|
8
|
-
MAX_WORKERS = multiprocessing.cpu_count()
|
9
|
-
|
10
|
-
|
11
|
-
def parallel_downsample_images(base64_strings):
|
12
|
-
import os
|
13
|
-
import sys
|
14
|
-
|
15
|
-
sys.path.insert(0, os.path.dirname(os.path.dirname(__file__)))
|
16
|
-
from test_result_description.image_processing import (
|
17
|
-
downsample_image, # type: ignore
|
18
|
-
)
|
19
|
-
|
20
|
-
num_images = len(base64_strings)
|
21
|
-
|
22
|
-
if num_images < MIN_IMAGES_FOR_PARALLEL:
|
23
|
-
return [downsample_image(img) for img in base64_strings]
|
24
|
-
|
25
|
-
num_workers = min(num_images, MAX_WORKERS)
|
26
|
-
|
27
|
-
with multiprocessing.Pool(processes=num_workers) as pool:
|
28
|
-
results = pool.map(downsample_image, base64_strings)
|
29
|
-
|
30
|
-
sys.path.pop(0)
|
31
|
-
|
32
|
-
return results
|
33
|
-
|
34
|
-
|
35
|
-
class Context:
|
36
|
-
def __init__(self, mode="local"):
|
37
|
-
pass
|
38
|
-
|
39
|
-
def load(self, input_data):
|
40
|
-
# this task can accept a dict or a test result object from the ValidMind Library
|
41
|
-
if isinstance(input_data, dict):
|
42
|
-
return input_data
|
43
|
-
|
44
|
-
# we are likely running outside of the ValidMind Library and need to convert
|
45
|
-
# the test result object to a dictionary
|
46
|
-
test_result = input_data
|
47
|
-
|
48
|
-
try:
|
49
|
-
from markdownify import markdownify as md
|
50
|
-
except ImportError as e:
|
51
|
-
raise ImportError(
|
52
|
-
"Failed to import markdownify. Please install the package to use this task."
|
53
|
-
) from e
|
54
|
-
|
55
|
-
input_data = {
|
56
|
-
"test_name": test_result.result_id.split(".")[-1],
|
57
|
-
"test_description": md(test_result.result_metadata[0]["text"]),
|
58
|
-
}
|
59
|
-
|
60
|
-
if hasattr(test_result, "metric") and test_result.metric.summary is not None:
|
61
|
-
input_data["summary"] = test_result.metric.summary.serialize()
|
62
|
-
elif (
|
63
|
-
hasattr(test_result, "test_results")
|
64
|
-
and test_result.test_results.summary is not None
|
65
|
-
):
|
66
|
-
input_data["summary"] = test_result.test_results.summary.serialize()
|
67
|
-
|
68
|
-
if test_result.figures:
|
69
|
-
input_data["figures"] = parallel_downsample_images(
|
70
|
-
[figure._get_b64_url() for figure in test_result.figures]
|
71
|
-
)
|
72
|
-
|
73
|
-
return input_data
|
@@ -1,124 +0,0 @@
|
|
1
|
-
# Copyright © 2023-2024 ValidMind Inc. All rights reserved.
|
2
|
-
# See the LICENSE file in the root of this repository for details.
|
3
|
-
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
|
-
|
5
|
-
import base64
|
6
|
-
import io
|
7
|
-
|
8
|
-
import numpy as np
|
9
|
-
from PIL import Image, ImageEnhance, ImageFilter
|
10
|
-
|
11
|
-
DOWNSAMPLE_PERCENTAGE = 50
|
12
|
-
|
13
|
-
|
14
|
-
def open_base64_image(base64_string):
|
15
|
-
if base64_string.startswith("data:image/png;base64,"):
|
16
|
-
base64_string = base64_string.split(",")[1]
|
17
|
-
|
18
|
-
image_data = base64.b64decode(base64_string)
|
19
|
-
image_buffer = io.BytesIO(image_data)
|
20
|
-
image = Image.open(image_buffer)
|
21
|
-
|
22
|
-
return image
|
23
|
-
|
24
|
-
|
25
|
-
def downsample_image(base64_string):
|
26
|
-
image = open_base64_image(base64_string)
|
27
|
-
|
28
|
-
# Calculate the target dimensions based on the reduction percentage
|
29
|
-
target_width = int(image.width * (1 - DOWNSAMPLE_PERCENTAGE / 100))
|
30
|
-
target_height = int(image.height * (1 - DOWNSAMPLE_PERCENTAGE / 100))
|
31
|
-
|
32
|
-
# If the image is already smaller than the target size, return the original
|
33
|
-
if image.width <= target_width and image.height <= target_height:
|
34
|
-
return base64_string
|
35
|
-
|
36
|
-
# remove any margins from the image
|
37
|
-
# Find the bounding box of non-uniform pixels (margin detection)
|
38
|
-
width, height = image.size
|
39
|
-
background = image.getpixel((0, 0)) # Assume top-left pixel is background color
|
40
|
-
|
41
|
-
def is_different(pixel):
|
42
|
-
return pixel != background
|
43
|
-
|
44
|
-
left = next(
|
45
|
-
x
|
46
|
-
for x in range(width)
|
47
|
-
if any(is_different(image.getpixel((x, y))) for y in range(height))
|
48
|
-
)
|
49
|
-
right = next(
|
50
|
-
x
|
51
|
-
for x in range(width - 1, -1, -1)
|
52
|
-
if any(is_different(image.getpixel((x, y))) for y in range(height))
|
53
|
-
)
|
54
|
-
top = next(
|
55
|
-
y
|
56
|
-
for y in range(height)
|
57
|
-
if any(is_different(image.getpixel((x, y))) for x in range(width))
|
58
|
-
)
|
59
|
-
bottom = next(
|
60
|
-
y
|
61
|
-
for y in range(height - 1, -1, -1)
|
62
|
-
if any(is_different(image.getpixel((x, y))) for x in range(width))
|
63
|
-
)
|
64
|
-
|
65
|
-
# Crop the image to remove the uniform margin (with some padding)
|
66
|
-
bbox = (left - 5, top - 5, right + 6, bottom + 6)
|
67
|
-
image = image.crop(bbox)
|
68
|
-
|
69
|
-
# If the image has an alpha channel, remove any transparent margins
|
70
|
-
if image.mode in ("RGBA", "LA"):
|
71
|
-
alpha = image.getchannel("A")
|
72
|
-
bbox = alpha.getbbox()
|
73
|
-
if bbox:
|
74
|
-
image = image.crop(bbox)
|
75
|
-
|
76
|
-
# Apply unsharp mask to enhance edges
|
77
|
-
image = image.filter(ImageFilter.UnsharpMask(radius=2, percent=150, threshold=3))
|
78
|
-
|
79
|
-
# Calculate new dimensions
|
80
|
-
aspect_ratio = image.width / image.height
|
81
|
-
new_height = target_height
|
82
|
-
new_width = int(new_height * aspect_ratio)
|
83
|
-
|
84
|
-
# print(f"downsampling from {width}x{height} to {new_width}x{new_height}")
|
85
|
-
|
86
|
-
# Ensure we don't exceed the target width
|
87
|
-
if new_width > target_width:
|
88
|
-
new_width = target_width
|
89
|
-
new_height = int(new_width / aspect_ratio)
|
90
|
-
|
91
|
-
# print(f"downsampling from {image.width}x{image.height} to {new_width}x{new_height}")
|
92
|
-
|
93
|
-
# Convert to numpy array for custom downsampling
|
94
|
-
img_array = np.array(image)
|
95
|
-
|
96
|
-
# Optimized area interpolation
|
97
|
-
h_factor = img_array.shape[0] / new_height
|
98
|
-
w_factor = img_array.shape[1] / new_width
|
99
|
-
|
100
|
-
h_indices = (np.arange(new_height).reshape(-1, 1) * h_factor).astype(int)
|
101
|
-
w_indices = (np.arange(new_width).reshape(1, -1) * w_factor).astype(int)
|
102
|
-
|
103
|
-
h_indices = np.minimum(h_indices, img_array.shape[0] - 1)
|
104
|
-
w_indices = np.minimum(w_indices, img_array.shape[1] - 1)
|
105
|
-
|
106
|
-
# Convert back to PIL Image
|
107
|
-
image = Image.fromarray(img_array[h_indices, w_indices].astype(np.uint8))
|
108
|
-
|
109
|
-
# Enhance contrast slightly
|
110
|
-
enhancer = ImageEnhance.Contrast(image)
|
111
|
-
image = enhancer.enhance(1.2)
|
112
|
-
|
113
|
-
# Sharpen the image
|
114
|
-
image = image.filter(ImageFilter.SHARPEN)
|
115
|
-
|
116
|
-
# Convert the image to bytes in PNG format
|
117
|
-
buffered = io.BytesIO()
|
118
|
-
image.save(buffered, format="PNG")
|
119
|
-
img_bytes = buffered.getvalue()
|
120
|
-
|
121
|
-
# Encode the bytes to base64
|
122
|
-
b64_encoded = base64.b64encode(img_bytes).decode("utf-8")
|
123
|
-
|
124
|
-
return f"data:image/png;base64,{b64_encoded}"
|
@@ -1,39 +0,0 @@
|
|
1
|
-
You are an expert data scientist and MRM specialist.
|
2
|
-
You are tasked with analyzing the results of a quantitative test run on some model or dataset.
|
3
|
-
Your goal is to create a test description that will act as part of the model documentation.
|
4
|
-
You will provide both the developer and other consumers of the documentation with a clear and concise "interpretation" of the results they will see.
|
5
|
-
The overarching theme to maintain is MRM documentation.
|
6
|
-
|
7
|
-
Examine the provided statistical test results and compose a description of the results.
|
8
|
-
The results are either in the form of serialized tables or images of plots.
|
9
|
-
Compose a description and interpretation of the result to accompany it in MRM documentation.
|
10
|
-
It will be read by other data scientists and developers and by validators and stakeholders.
|
11
|
-
|
12
|
-
Use valid Markdown syntax to format the response.
|
13
|
-
Avoid long sentences and complex vocabulary.
|
14
|
-
Avoid overly verbose explanations - the goal is to explain to a user what they are seeing in the results.
|
15
|
-
Structure the response clearly and logically.
|
16
|
-
Respond only with your analysis and insights, not the verbatim test results.
|
17
|
-
Respond only with the markdown content, no explanation or context for your response is necessary.
|
18
|
-
Use the Test ID that is provided to form the Test Name e.g. "ClassImbalance" -> "Class Imbalance".
|
19
|
-
|
20
|
-
Explain the test, its purpose, its mechanism/formula etc and why it is useful.
|
21
|
-
If relevant, provide a very brief description of the way this test is used in model/dataset evaluation and how it is interpreted.
|
22
|
-
Highlight the key insights from the test results. The key insights should be concise and easily understood.
|
23
|
-
An insight should only be included if it is something not entirely obvious from the test results.
|
24
|
-
End the response with any closing remarks, summary or additional useful information.
|
25
|
-
|
26
|
-
Use the following format for the response (feel free to stray from it if necessary - this is a suggested starting point):
|
27
|
-
|
28
|
-
<ResponseFormat>
|
29
|
-
**<Test Name>** calculates the xyz <continue to explain what it does in detail>...
|
30
|
-
|
31
|
-
This test is useful for <explain why and for what this test is useful>...
|
32
|
-
|
33
|
-
**Key Insights:**
|
34
|
-
|
35
|
-
The following key insights can be identified in the test results:
|
36
|
-
|
37
|
-
- **<key insight 1 - title>**: <concise explanation of key insight 1>
|
38
|
-
- ...<continue with any other key insights using the same format>
|
39
|
-
</ResponseFormat>
|
@@ -1,30 +0,0 @@
|
|
1
|
-
**Test ID**: `{{ test_name }}`
|
2
|
-
|
3
|
-
**Test Description**:
|
4
|
-
|
5
|
-
{{ test_description }}
|
6
|
-
|
7
|
-
---
|
8
|
-
|
9
|
-
Generate a description of the following result of the test using the instructions given in your system prompt.
|
10
|
-
|
11
|
-
{%- if context %}
|
12
|
-
**Context**:
|
13
|
-
{{ context }}
|
14
|
-
{%- endif %}
|
15
|
-
|
16
|
-
{%- if summary %}
|
17
|
-
**Test Result Tables** *(Raw Data)*:
|
18
|
-
{{ summary }}
|
19
|
-
{%- endif %}
|
20
|
-
|
21
|
-
{%- if figures %}
|
22
|
-
The following images make up the results of the test.
|
23
|
-
{%- for b64_image_url in figures %}
|
24
|
-
[[IMAGE:{{ b64_image_url }}]]
|
25
|
-
{%- endfor %}
|
26
|
-
{%- endif %}
|
27
|
-
|
28
|
-
Keep your response concise and to the point!
|
29
|
-
Only include content in your response if its something truly insightful or interesting!
|
30
|
-
DO NOT VERBOSELY EXPLAIN THE TEST OR THE RESULTS!!!
|
File without changes
|
File without changes
|