validmind 2.5.15__py3-none-any.whl → 2.5.19__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- validmind/__version__.py +1 -1
- validmind/ai/test_descriptions.py +54 -112
- validmind/ai/test_result_description/config.yaml +29 -0
- validmind/ai/test_result_description/context.py +73 -0
- validmind/ai/test_result_description/image_processing.py +124 -0
- validmind/ai/test_result_description/system.jinja +39 -0
- validmind/ai/test_result_description/user.jinja +25 -0
- validmind/datasets/credit_risk/__init__.py +1 -0
- validmind/datasets/credit_risk/datasets/lending_club_biased.csv.gz +0 -0
- validmind/datasets/credit_risk/lending_club_bias.py +142 -0
- validmind/errors.py +17 -0
- validmind/tests/__types__.py +19 -10
- validmind/tests/{model_validation/statsmodels → data_validation}/BoxPierce.py +20 -24
- validmind/tests/data_validation/ChiSquaredFeaturesTable.py +4 -1
- validmind/tests/{model_validation/statsmodels → data_validation}/JarqueBera.py +22 -30
- validmind/tests/{model_validation/statsmodels → data_validation}/LJungBox.py +23 -27
- validmind/tests/data_validation/ProtectedClassesCombination.py +205 -0
- validmind/tests/data_validation/ProtectedClassesDescription.py +130 -0
- validmind/tests/data_validation/ProtectedClassesDisparity.py +141 -0
- validmind/tests/data_validation/ProtectedClassesThresholdOptimizer.py +180 -0
- validmind/tests/{model_validation/statsmodels → data_validation}/RunsTest.py +17 -20
- validmind/tests/{model_validation/statsmodels → data_validation}/ShapiroWilk.py +20 -22
- validmind/tests/data_validation/nlp/Hashtags.py +15 -20
- validmind/tests/data_validation/nlp/TextDescription.py +3 -1
- validmind/tests/load.py +21 -5
- validmind/tests/model_validation/ContextualRecall.py +3 -0
- validmind/tests/model_validation/ragas/AnswerCorrectness.py +12 -5
- validmind/tests/model_validation/ragas/AnswerRelevance.py +12 -6
- validmind/tests/model_validation/ragas/AnswerSimilarity.py +12 -6
- validmind/tests/model_validation/ragas/AspectCritique.py +22 -17
- validmind/tests/model_validation/ragas/ContextEntityRecall.py +12 -6
- validmind/tests/model_validation/ragas/ContextPrecision.py +12 -6
- validmind/tests/model_validation/ragas/ContextRecall.py +12 -6
- validmind/tests/model_validation/ragas/ContextUtilization.py +161 -0
- validmind/tests/model_validation/ragas/Faithfulness.py +12 -6
- validmind/tests/model_validation/ragas/NoiseSensitivity.py +158 -0
- validmind/tests/model_validation/sklearn/FeatureImportance.py +3 -3
- validmind/tests/model_validation/sklearn/PermutationFeatureImportance.py +1 -1
- validmind/tests/model_validation/sklearn/RegressionR2Square.py +1 -2
- validmind/tests/model_validation/sklearn/SHAPGlobalImportance.py +59 -0
- validmind/tests/model_validation/statsmodels/DurbinWatsonTest.py +40 -20
- validmind/tests/model_validation/statsmodels/PredictionProbabilitiesHistogram.py +0 -1
- validmind/tests/model_validation/statsmodels/RegressionCoeffs.py +1 -1
- validmind/utils.py +4 -0
- validmind/vm_models/test/metric.py +1 -0
- validmind/vm_models/test/result_wrapper.py +50 -26
- validmind/vm_models/test/threshold_test.py +1 -0
- {validmind-2.5.15.dist-info → validmind-2.5.19.dist-info}/METADATA +4 -3
- {validmind-2.5.15.dist-info → validmind-2.5.19.dist-info}/RECORD +52 -39
- {validmind-2.5.15.dist-info → validmind-2.5.19.dist-info}/WHEEL +1 -1
- {validmind-2.5.15.dist-info → validmind-2.5.19.dist-info}/LICENSE +0 -0
- {validmind-2.5.15.dist-info → validmind-2.5.19.dist-info}/entry_points.txt +0 -0
validmind/__version__.py
CHANGED
@@ -1 +1 @@
|
|
1
|
-
__version__ = "2.5.
|
1
|
+
__version__ = "2.5.19"
|
@@ -3,15 +3,19 @@
|
|
3
3
|
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
4
|
|
5
5
|
import os
|
6
|
+
import re
|
6
7
|
from concurrent.futures import ThreadPoolExecutor
|
7
8
|
from typing import Union
|
8
9
|
|
10
|
+
from jinja2 import Template
|
11
|
+
|
9
12
|
from validmind.utils import md_to_html
|
10
13
|
|
11
14
|
from ..client_config import client_config
|
12
15
|
from ..logging import get_logger
|
13
16
|
|
14
17
|
__executor = ThreadPoolExecutor()
|
18
|
+
__prompt = None
|
15
19
|
|
16
20
|
logger = get_logger(__name__)
|
17
21
|
|
@@ -19,71 +23,48 @@ logger = get_logger(__name__)
|
|
19
23
|
AI_REVISION_NAME = "Generated by ValidMind AI"
|
20
24
|
DEFAULT_REVISION_NAME = "Default Description"
|
21
25
|
|
22
|
-
SYSTEM_PROMPT = """ # noqa
|
23
|
-
You are an expert data scientist and MRM specialist.
|
24
|
-
You are tasked with analyzing the results of a quantitative test run on some model or dataset.
|
25
|
-
Your goal is to create a test description that will act as part of the model documentation.
|
26
|
-
You will provide both the developer and other consumers of the documentation with a clear and concise "interpretation" of the results they will see.
|
27
|
-
The overarching theme to maintain is MRM documentation.
|
28
|
-
|
29
|
-
Examine the provided statistical test results and compose a description of the results.
|
30
|
-
The results are either in the form of serialized tables or images of plots.
|
31
|
-
Compose a description and interpretation of the result to accompany it in MRM documentation.
|
32
|
-
It will be read by other data scientists and developers and by validators and stakeholders.
|
33
|
-
|
34
|
-
Use valid Markdown syntax to format the response.
|
35
|
-
Avoid long sentences and complex vocabulary.
|
36
|
-
Avoid overly verbose explanations - the goal is to explain to a user what they are seeing in the results.
|
37
|
-
Structure the response clearly and logically.
|
38
|
-
Respond only with your analysis and insights, not the verbatim test results.
|
39
|
-
Respond only with the markdown content, no explanation or context for your response is necessary.
|
40
|
-
Use the Test ID that is provided to form the Test Name e.g. "ClassImbalance" -> "Class Imbalance".
|
41
|
-
|
42
|
-
Explain the test, its purpose, its mechanism/formula etc and why it is useful.
|
43
|
-
If relevant, provide a very brief description of the way this test is used in model/dataset evaluation and how it is interpreted.
|
44
|
-
Highlight the key insights from the test results. The key insights should be concise and easily understood.
|
45
|
-
An insight should only be included if it is something not entirely obvious from the test results.
|
46
|
-
End the response with any closing remarks, summary or additional useful information.
|
47
26
|
|
48
|
-
|
27
|
+
def _load_prompt():
|
28
|
+
global __prompt
|
49
29
|
|
50
|
-
|
51
|
-
|
30
|
+
if not __prompt:
|
31
|
+
folder_path = os.path.join(os.path.dirname(__file__), "test_result_description")
|
32
|
+
with open(os.path.join(folder_path, "system.jinja"), "r") as f:
|
33
|
+
system_prompt = f.read()
|
34
|
+
with open(os.path.join(folder_path, "user.jinja"), "r") as f:
|
35
|
+
user_prompt = f.read()
|
52
36
|
|
53
|
-
|
37
|
+
__prompt = (Template(system_prompt), Template(user_prompt))
|
54
38
|
|
55
|
-
|
39
|
+
return __prompt
|
56
40
|
|
57
|
-
The following key insights can be identified in the test results:
|
58
41
|
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
""".strip()
|
42
|
+
def prompt_to_message(role, prompt):
|
43
|
+
if "[[IMAGE:" not in prompt:
|
44
|
+
return {"role": role, "content": prompt}
|
63
45
|
|
46
|
+
content = []
|
64
47
|
|
65
|
-
|
66
|
-
|
48
|
+
# Regex pattern to find [[IMAGE:<b64-data>]] markers
|
49
|
+
pattern = re.compile(r"\[\[IMAGE:(.*?)\]\]", re.DOTALL)
|
67
50
|
|
68
|
-
|
69
|
-
|
70
|
-
|
51
|
+
last_index = 0
|
52
|
+
for match in pattern.finditer(prompt):
|
53
|
+
# Text before the image marker
|
54
|
+
start, end = match.span()
|
55
|
+
if start > last_index:
|
56
|
+
content.append({"type": "text", "text": prompt[last_index:start]})
|
71
57
|
|
72
|
-
|
73
|
-
{
|
74
|
-
</Test Results Table(s)>
|
75
|
-
""".strip()
|
58
|
+
# Image
|
59
|
+
content.append({"type": "image_url", "image_url": {"url": match.group(1)}})
|
76
60
|
|
61
|
+
last_index = end
|
77
62
|
|
78
|
-
|
79
|
-
|
63
|
+
# Text after the last image
|
64
|
+
if last_index < len(prompt):
|
65
|
+
content.append({"type": "text", "text": prompt[last_index:]})
|
80
66
|
|
81
|
-
|
82
|
-
{test_description}
|
83
|
-
</Test Docstring>
|
84
|
-
|
85
|
-
The attached plots show the results of the test.
|
86
|
-
""".strip()
|
67
|
+
return {"role": role, "content": content}
|
87
68
|
|
88
69
|
|
89
70
|
class DescriptionFuture:
|
@@ -117,7 +98,9 @@ def generate_description(
|
|
117
98
|
):
|
118
99
|
"""Generate the description for the test results"""
|
119
100
|
if not test_summary and not figures and not metric:
|
120
|
-
raise ValueError(
|
101
|
+
raise ValueError(
|
102
|
+
"No summary, unit metric or figures provided - cannot generate description"
|
103
|
+
)
|
121
104
|
|
122
105
|
# TODO: fix circular import
|
123
106
|
from validmind.ai.utils import get_client_and_model
|
@@ -140,68 +123,27 @@ def generate_description(
|
|
140
123
|
else:
|
141
124
|
test_summary = metric_summary
|
142
125
|
|
143
|
-
if test_summary
|
144
|
-
logger.debug(
|
145
|
-
f"Generating description for test {test_name} with stringified summary"
|
146
|
-
)
|
147
|
-
return (
|
148
|
-
client.chat.completions.create(
|
149
|
-
model=model,
|
150
|
-
temperature=0,
|
151
|
-
seed=42,
|
152
|
-
messages=[
|
153
|
-
{"role": "system", "content": SYSTEM_PROMPT},
|
154
|
-
{
|
155
|
-
"role": "user",
|
156
|
-
"content": USER_PROMPT.format(
|
157
|
-
test_name=test_name,
|
158
|
-
test_description=test_description,
|
159
|
-
test_summary=test_summary,
|
160
|
-
),
|
161
|
-
},
|
162
|
-
],
|
163
|
-
)
|
164
|
-
.choices[0]
|
165
|
-
.message.content.strip()
|
166
|
-
)
|
126
|
+
figures = [] if test_summary else figures
|
167
127
|
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
"text": USER_PROMPT_FIGURES.format(
|
184
|
-
test_name=test_name,
|
185
|
-
test_description=test_description,
|
186
|
-
),
|
187
|
-
},
|
188
|
-
*[
|
189
|
-
{
|
190
|
-
"type": "image_url",
|
191
|
-
"image_url": {
|
192
|
-
"url": figure._get_b64_url(),
|
193
|
-
},
|
194
|
-
}
|
195
|
-
for figure in figures
|
196
|
-
],
|
197
|
-
],
|
198
|
-
},
|
199
|
-
],
|
200
|
-
)
|
201
|
-
.choices[0]
|
202
|
-
.message.content.strip()
|
128
|
+
input_data = {
|
129
|
+
"test_name": test_name,
|
130
|
+
"test_description": test_description,
|
131
|
+
"summary": test_summary,
|
132
|
+
"figures": [figure._get_b64_url() for figure in figures],
|
133
|
+
}
|
134
|
+
system, user = _load_prompt()
|
135
|
+
|
136
|
+
response = client.chat.completions.create(
|
137
|
+
model=model,
|
138
|
+
temperature=0.0,
|
139
|
+
messages=[
|
140
|
+
prompt_to_message("system", system.render(input_data)),
|
141
|
+
prompt_to_message("user", user.render(input_data)),
|
142
|
+
],
|
203
143
|
)
|
204
144
|
|
145
|
+
return response.choices[0].message.content
|
146
|
+
|
205
147
|
|
206
148
|
def background_generate_description(
|
207
149
|
test_id: str,
|
@@ -0,0 +1,29 @@
|
|
1
|
+
id: test_result_description
|
2
|
+
name: Test Result Description
|
3
|
+
description: Generate a description for a test result
|
4
|
+
version: 0.1.0
|
5
|
+
model: gpt-4o
|
6
|
+
temperature: 0.0
|
7
|
+
output_type: markdown
|
8
|
+
prompts:
|
9
|
+
system:
|
10
|
+
role: system
|
11
|
+
path: system.jinja
|
12
|
+
user:
|
13
|
+
role: user
|
14
|
+
path: user.jinja
|
15
|
+
inputs:
|
16
|
+
test_name:
|
17
|
+
description: The name of the test that produced the result (usually the last part of the test ID)
|
18
|
+
type: string
|
19
|
+
test_description:
|
20
|
+
description: The description (docstring) of the test that was run
|
21
|
+
type: string
|
22
|
+
summary:
|
23
|
+
description: The json result summary (i.e. the table(s) returned by the test)
|
24
|
+
type: list
|
25
|
+
optional: true
|
26
|
+
figures:
|
27
|
+
description: A list of base64 encoded images of the figures returned by the test
|
28
|
+
type: list
|
29
|
+
optional: true
|
@@ -0,0 +1,73 @@
|
|
1
|
+
# Copyright © 2023-2024 ValidMind Inc. All rights reserved.
|
2
|
+
# See the LICENSE file in the root of this repository for details.
|
3
|
+
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
|
+
|
5
|
+
import multiprocessing
|
6
|
+
|
7
|
+
MIN_IMAGES_FOR_PARALLEL = 4
|
8
|
+
MAX_WORKERS = multiprocessing.cpu_count()
|
9
|
+
|
10
|
+
|
11
|
+
def parallel_downsample_images(base64_strings):
|
12
|
+
import os
|
13
|
+
import sys
|
14
|
+
|
15
|
+
sys.path.insert(0, os.path.dirname(os.path.dirname(__file__)))
|
16
|
+
from test_result_description.image_processing import (
|
17
|
+
downsample_image, # type: ignore
|
18
|
+
)
|
19
|
+
|
20
|
+
num_images = len(base64_strings)
|
21
|
+
|
22
|
+
if num_images < MIN_IMAGES_FOR_PARALLEL:
|
23
|
+
return [downsample_image(img) for img in base64_strings]
|
24
|
+
|
25
|
+
num_workers = min(num_images, MAX_WORKERS)
|
26
|
+
|
27
|
+
with multiprocessing.Pool(processes=num_workers) as pool:
|
28
|
+
results = pool.map(downsample_image, base64_strings)
|
29
|
+
|
30
|
+
sys.path.pop(0)
|
31
|
+
|
32
|
+
return results
|
33
|
+
|
34
|
+
|
35
|
+
class Context:
|
36
|
+
def __init__(self, mode="local"):
|
37
|
+
pass
|
38
|
+
|
39
|
+
def load(self, input_data):
|
40
|
+
# this task can accept a dict or a test result object from the dev framework
|
41
|
+
if isinstance(input_data, dict):
|
42
|
+
return input_data
|
43
|
+
|
44
|
+
# we are likely running outside of the dev framework and need to convert
|
45
|
+
# the test result object to a dictionary
|
46
|
+
test_result = input_data
|
47
|
+
|
48
|
+
try:
|
49
|
+
from markdownify import markdownify as md
|
50
|
+
except ImportError as e:
|
51
|
+
raise ImportError(
|
52
|
+
"Failed to import markdownify. Please install the package to use this task."
|
53
|
+
) from e
|
54
|
+
|
55
|
+
input_data = {
|
56
|
+
"test_name": test_result.result_id.split(".")[-1],
|
57
|
+
"test_description": md(test_result.result_metadata[0]["text"]),
|
58
|
+
}
|
59
|
+
|
60
|
+
if hasattr(test_result, "metric") and test_result.metric.summary is not None:
|
61
|
+
input_data["summary"] = test_result.metric.summary.serialize()
|
62
|
+
elif (
|
63
|
+
hasattr(test_result, "test_results")
|
64
|
+
and test_result.test_results.summary is not None
|
65
|
+
):
|
66
|
+
input_data["summary"] = test_result.test_results.summary.serialize()
|
67
|
+
|
68
|
+
if test_result.figures:
|
69
|
+
input_data["figures"] = parallel_downsample_images(
|
70
|
+
[figure._get_b64_url() for figure in test_result.figures]
|
71
|
+
)
|
72
|
+
|
73
|
+
return input_data
|
@@ -0,0 +1,124 @@
|
|
1
|
+
# Copyright © 2023-2024 ValidMind Inc. All rights reserved.
|
2
|
+
# See the LICENSE file in the root of this repository for details.
|
3
|
+
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
|
+
|
5
|
+
import base64
|
6
|
+
import io
|
7
|
+
|
8
|
+
import numpy as np
|
9
|
+
from PIL import Image, ImageEnhance, ImageFilter
|
10
|
+
|
11
|
+
DOWNSAMPLE_PERCENTAGE = 50
|
12
|
+
|
13
|
+
|
14
|
+
def open_base64_image(base64_string):
|
15
|
+
if base64_string.startswith("data:image/png;base64,"):
|
16
|
+
base64_string = base64_string.split(",")[1]
|
17
|
+
|
18
|
+
image_data = base64.b64decode(base64_string)
|
19
|
+
image_buffer = io.BytesIO(image_data)
|
20
|
+
image = Image.open(image_buffer)
|
21
|
+
|
22
|
+
return image
|
23
|
+
|
24
|
+
|
25
|
+
def downsample_image(base64_string):
|
26
|
+
image = open_base64_image(base64_string)
|
27
|
+
|
28
|
+
# Calculate the target dimensions based on the reduction percentage
|
29
|
+
target_width = int(image.width * (1 - DOWNSAMPLE_PERCENTAGE / 100))
|
30
|
+
target_height = int(image.height * (1 - DOWNSAMPLE_PERCENTAGE / 100))
|
31
|
+
|
32
|
+
# If the image is already smaller than the target size, return the original
|
33
|
+
if image.width <= target_width and image.height <= target_height:
|
34
|
+
return base64_string
|
35
|
+
|
36
|
+
# remove any margins from the image
|
37
|
+
# Find the bounding box of non-uniform pixels (margin detection)
|
38
|
+
width, height = image.size
|
39
|
+
background = image.getpixel((0, 0)) # Assume top-left pixel is background color
|
40
|
+
|
41
|
+
def is_different(pixel):
|
42
|
+
return pixel != background
|
43
|
+
|
44
|
+
left = next(
|
45
|
+
x
|
46
|
+
for x in range(width)
|
47
|
+
if any(is_different(image.getpixel((x, y))) for y in range(height))
|
48
|
+
)
|
49
|
+
right = next(
|
50
|
+
x
|
51
|
+
for x in range(width - 1, -1, -1)
|
52
|
+
if any(is_different(image.getpixel((x, y))) for y in range(height))
|
53
|
+
)
|
54
|
+
top = next(
|
55
|
+
y
|
56
|
+
for y in range(height)
|
57
|
+
if any(is_different(image.getpixel((x, y))) for x in range(width))
|
58
|
+
)
|
59
|
+
bottom = next(
|
60
|
+
y
|
61
|
+
for y in range(height - 1, -1, -1)
|
62
|
+
if any(is_different(image.getpixel((x, y))) for x in range(width))
|
63
|
+
)
|
64
|
+
|
65
|
+
# Crop the image to remove the uniform margin (with some padding)
|
66
|
+
bbox = (left - 5, top - 5, right + 6, bottom + 6)
|
67
|
+
image = image.crop(bbox)
|
68
|
+
|
69
|
+
# If the image has an alpha channel, remove any transparent margins
|
70
|
+
if image.mode in ("RGBA", "LA"):
|
71
|
+
alpha = image.getchannel("A")
|
72
|
+
bbox = alpha.getbbox()
|
73
|
+
if bbox:
|
74
|
+
image = image.crop(bbox)
|
75
|
+
|
76
|
+
# Apply unsharp mask to enhance edges
|
77
|
+
image = image.filter(ImageFilter.UnsharpMask(radius=2, percent=150, threshold=3))
|
78
|
+
|
79
|
+
# Calculate new dimensions
|
80
|
+
aspect_ratio = image.width / image.height
|
81
|
+
new_height = target_height
|
82
|
+
new_width = int(new_height * aspect_ratio)
|
83
|
+
|
84
|
+
# print(f"downsampling from {width}x{height} to {new_width}x{new_height}")
|
85
|
+
|
86
|
+
# Ensure we don't exceed the target width
|
87
|
+
if new_width > target_width:
|
88
|
+
new_width = target_width
|
89
|
+
new_height = int(new_width / aspect_ratio)
|
90
|
+
|
91
|
+
# print(f"downsampling from {image.width}x{image.height} to {new_width}x{new_height}")
|
92
|
+
|
93
|
+
# Convert to numpy array for custom downsampling
|
94
|
+
img_array = np.array(image)
|
95
|
+
|
96
|
+
# Optimized area interpolation
|
97
|
+
h_factor = img_array.shape[0] / new_height
|
98
|
+
w_factor = img_array.shape[1] / new_width
|
99
|
+
|
100
|
+
h_indices = (np.arange(new_height).reshape(-1, 1) * h_factor).astype(int)
|
101
|
+
w_indices = (np.arange(new_width).reshape(1, -1) * w_factor).astype(int)
|
102
|
+
|
103
|
+
h_indices = np.minimum(h_indices, img_array.shape[0] - 1)
|
104
|
+
w_indices = np.minimum(w_indices, img_array.shape[1] - 1)
|
105
|
+
|
106
|
+
# Convert back to PIL Image
|
107
|
+
image = Image.fromarray(img_array[h_indices, w_indices].astype(np.uint8))
|
108
|
+
|
109
|
+
# Enhance contrast slightly
|
110
|
+
enhancer = ImageEnhance.Contrast(image)
|
111
|
+
image = enhancer.enhance(1.2)
|
112
|
+
|
113
|
+
# Sharpen the image
|
114
|
+
image = image.filter(ImageFilter.SHARPEN)
|
115
|
+
|
116
|
+
# Convert the image to bytes in PNG format
|
117
|
+
buffered = io.BytesIO()
|
118
|
+
image.save(buffered, format="PNG")
|
119
|
+
img_bytes = buffered.getvalue()
|
120
|
+
|
121
|
+
# Encode the bytes to base64
|
122
|
+
b64_encoded = base64.b64encode(img_bytes).decode("utf-8")
|
123
|
+
|
124
|
+
return f"data:image/png;base64,{b64_encoded}"
|
@@ -0,0 +1,39 @@
|
|
1
|
+
You are an expert data scientist and MRM specialist.
|
2
|
+
You are tasked with analyzing the results of a quantitative test run on some model or dataset.
|
3
|
+
Your goal is to create a test description that will act as part of the model documentation.
|
4
|
+
You will provide both the developer and other consumers of the documentation with a clear and concise "interpretation" of the results they will see.
|
5
|
+
The overarching theme to maintain is MRM documentation.
|
6
|
+
|
7
|
+
Examine the provided statistical test results and compose a description of the results.
|
8
|
+
The results are either in the form of serialized tables or images of plots.
|
9
|
+
Compose a description and interpretation of the result to accompany it in MRM documentation.
|
10
|
+
It will be read by other data scientists and developers and by validators and stakeholders.
|
11
|
+
|
12
|
+
Use valid Markdown syntax to format the response.
|
13
|
+
Avoid long sentences and complex vocabulary.
|
14
|
+
Avoid overly verbose explanations - the goal is to explain to a user what they are seeing in the results.
|
15
|
+
Structure the response clearly and logically.
|
16
|
+
Respond only with your analysis and insights, not the verbatim test results.
|
17
|
+
Respond only with the markdown content, no explanation or context for your response is necessary.
|
18
|
+
Use the Test ID that is provided to form the Test Name e.g. "ClassImbalance" -> "Class Imbalance".
|
19
|
+
|
20
|
+
Explain the test, its purpose, its mechanism/formula etc and why it is useful.
|
21
|
+
If relevant, provide a very brief description of the way this test is used in model/dataset evaluation and how it is interpreted.
|
22
|
+
Highlight the key insights from the test results. The key insights should be concise and easily understood.
|
23
|
+
An insight should only be included if it is something not entirely obvious from the test results.
|
24
|
+
End the response with any closing remarks, summary or additional useful information.
|
25
|
+
|
26
|
+
Use the following format for the response (feel free to stray from it if necessary - this is a suggested starting point):
|
27
|
+
|
28
|
+
<ResponseFormat>
|
29
|
+
**<Test Name>** calculates the xyz <continue to explain what it does in detail>...
|
30
|
+
|
31
|
+
This test is useful for <explain why and for what this test is useful>...
|
32
|
+
|
33
|
+
**Key Insights:**
|
34
|
+
|
35
|
+
The following key insights can be identified in the test results:
|
36
|
+
|
37
|
+
- **<key insight 1 - title>**: <concise explanation of key insight 1>
|
38
|
+
- ...<continue with any other key insights using the same format>
|
39
|
+
</ResponseFormat>
|
@@ -0,0 +1,25 @@
|
|
1
|
+
**Test ID**: `{{ test_name }}`
|
2
|
+
|
3
|
+
**Test Description**:
|
4
|
+
|
5
|
+
{{ test_description }}
|
6
|
+
|
7
|
+
---
|
8
|
+
|
9
|
+
Generate a description of the following result of the test using the instructions given in your system prompt.
|
10
|
+
|
11
|
+
{%- if summary %}
|
12
|
+
**Test Result Tables** *(Raw Data)*:
|
13
|
+
{{ summary }}
|
14
|
+
{%- endif %}
|
15
|
+
|
16
|
+
{%- if figures %}
|
17
|
+
The following images make up the results of the test.
|
18
|
+
{%- for b64_image_url in figures %}
|
19
|
+
[[IMAGE:{{ b64_image_url }}]]
|
20
|
+
{%- endfor %}
|
21
|
+
{%- endif %}
|
22
|
+
|
23
|
+
Keep your response concise and to the point!
|
24
|
+
Only include content in your response if its something truly insightful or interesting!
|
25
|
+
DO NOT VERBOSELY EXPLAIN THE TEST OR THE RESULTS!!!
|
Binary file
|
@@ -0,0 +1,142 @@
|
|
1
|
+
# Copyright © 2023-2024 ValidMind Inc. All rights reserved.
|
2
|
+
# See the LICENSE file in the root of this repository for details.
|
3
|
+
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
|
+
|
5
|
+
import os
|
6
|
+
|
7
|
+
import numpy as np
|
8
|
+
import pandas as pd
|
9
|
+
from sklearn.model_selection import train_test_split
|
10
|
+
from sklearn.preprocessing import LabelEncoder
|
11
|
+
|
12
|
+
current_path = os.path.dirname(os.path.abspath(__file__))
|
13
|
+
dataset_path = os.path.join(current_path, "datasets")
|
14
|
+
|
15
|
+
# URLs or file paths for online and offline data
|
16
|
+
data_file = os.path.join(dataset_path, "lending_club_biased.csv.gz")
|
17
|
+
|
18
|
+
target_column = "loan_status"
|
19
|
+
protected_classes = ["Gender", "Race", "Marital_Status"]
|
20
|
+
|
21
|
+
drop_columns = ["total_pymnt", "id", "verification_status", "purpose"]
|
22
|
+
|
23
|
+
score_params = {
|
24
|
+
"target_score": 600,
|
25
|
+
"target_odds": 50,
|
26
|
+
"pdo": 20,
|
27
|
+
}
|
28
|
+
|
29
|
+
|
30
|
+
def load_data():
|
31
|
+
"""
|
32
|
+
Load data from the specified CSV file.
|
33
|
+
|
34
|
+
:return: DataFrame containing the loaded data.
|
35
|
+
"""
|
36
|
+
|
37
|
+
print(f"Loading data from: {data_file}")
|
38
|
+
# Since we know the offline_data_file path ends with '.zip', we replace it with '.csv.gz'
|
39
|
+
gzip_file_path = data_file.replace(".zip", ".csv.gz")
|
40
|
+
# Read the CSV file directly from the .gz archive
|
41
|
+
df = pd.read_csv(gzip_file_path, compression="gzip")
|
42
|
+
print("Data loaded successfully.")
|
43
|
+
df = _clean_data(df)
|
44
|
+
|
45
|
+
return df
|
46
|
+
|
47
|
+
|
48
|
+
def _clean_data(df):
|
49
|
+
df = df.copy()
|
50
|
+
print("Loading the raw dataset:")
|
51
|
+
print(
|
52
|
+
f"Rows: {df.shape[0]}\nColumns: {df.shape[1]}\nMissing values: {df.isnull().sum().sum()}\n"
|
53
|
+
)
|
54
|
+
|
55
|
+
# Drop columns not relevant for this model
|
56
|
+
print(f"Dropping columns not relevant for this model: {drop_columns}")
|
57
|
+
df = df.drop(columns=drop_columns)
|
58
|
+
print(
|
59
|
+
f"Rows: {df.shape[0]}\nColumns: {df.shape[1]}\nMissing values: {df.isnull().sum().sum()}\n"
|
60
|
+
)
|
61
|
+
|
62
|
+
# Drop rows with missing target values
|
63
|
+
df.dropna(subset=[target_column], inplace=True)
|
64
|
+
print("Dropping rows with missing target values:")
|
65
|
+
print(
|
66
|
+
f"Rows: {df.shape[0]}\nColumns: {df.shape[1]}\nMissing values: {df.isnull().sum().sum()}\n"
|
67
|
+
)
|
68
|
+
|
69
|
+
# Drop columns with more than N percent missing values
|
70
|
+
missing_values = df.isnull().mean()
|
71
|
+
df = df.loc[:, missing_values < 0.7]
|
72
|
+
print("Dropping columns with more than 70% missing values:")
|
73
|
+
print(
|
74
|
+
f"Rows: {df.shape[0]}\nColumns: {df.shape[1]}\nMissing values: {df.isnull().sum().sum()}\n"
|
75
|
+
)
|
76
|
+
|
77
|
+
# Drop columns with only one unique value
|
78
|
+
unique_values = df.nunique()
|
79
|
+
df = df.loc[:, unique_values > 1]
|
80
|
+
print("Dropping columns with only one unique value:")
|
81
|
+
print(
|
82
|
+
f"Rows: {df.shape[0]}\nColumns: {df.shape[1]}\nMissing values: {df.isnull().sum().sum()}\n"
|
83
|
+
)
|
84
|
+
|
85
|
+
return df
|
86
|
+
|
87
|
+
|
88
|
+
def preprocess(df):
|
89
|
+
df = df.copy()
|
90
|
+
|
91
|
+
# Convert the target variable to integer type for modeling.
|
92
|
+
df[target_column] = df[target_column].astype(int)
|
93
|
+
|
94
|
+
# Identify and encode categorical variables for modeling purposes
|
95
|
+
label_encoders = {}
|
96
|
+
categorical_columns = df.select_dtypes(include=["object"]).columns
|
97
|
+
|
98
|
+
for column in categorical_columns:
|
99
|
+
le = LabelEncoder()
|
100
|
+
df[f"{column}_encoded"] = le.fit_transform(df[column])
|
101
|
+
label_encoders[column] = le
|
102
|
+
df = df.drop(columns=[column]) # Remove the original column
|
103
|
+
|
104
|
+
print(f"Encoding categorical variables: {list(categorical_columns)}")
|
105
|
+
print(
|
106
|
+
f"Rows: {df.shape[0]}\nColumns: {df.shape[1]}\nMissing values: {df.isnull().sum().sum()}\n"
|
107
|
+
)
|
108
|
+
|
109
|
+
return df
|
110
|
+
|
111
|
+
|
112
|
+
def split(df, test_size=0.3):
|
113
|
+
df = df.copy()
|
114
|
+
|
115
|
+
# Splitting the dataset into training and test sets
|
116
|
+
train_df, test_df = train_test_split(df, test_size=test_size, random_state=42)
|
117
|
+
|
118
|
+
# Calculate and print details for the training dataset
|
119
|
+
print(
|
120
|
+
f"Training Dataset:\nRows: {train_df.shape[0]}\nColumns: {train_df.shape[1]}\nMissing values: {train_df.isnull().sum().sum()}\n"
|
121
|
+
)
|
122
|
+
|
123
|
+
# Calculate and print details for the test dataset
|
124
|
+
print(
|
125
|
+
f"Test Dataset:\nRows: {test_df.shape[0]}\nColumns: {test_df.shape[1]}\nMissing values: {test_df.isnull().sum().sum()}\n"
|
126
|
+
)
|
127
|
+
|
128
|
+
return train_df, test_df
|
129
|
+
|
130
|
+
|
131
|
+
def compute_scores(probabilities):
|
132
|
+
|
133
|
+
target_score = score_params["target_score"]
|
134
|
+
target_odds = score_params["target_odds"]
|
135
|
+
pdo = score_params["pdo"]
|
136
|
+
|
137
|
+
factor = pdo / np.log(2)
|
138
|
+
offset = target_score - (factor * np.log(target_odds))
|
139
|
+
|
140
|
+
scores = offset + factor * np.log(probabilities / (1 - probabilities))
|
141
|
+
|
142
|
+
return scores
|