validmind 2.7.8__py3-none-any.whl → 2.8.10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (32) hide show
  1. validmind/__init__.py +58 -10
  2. validmind/__version__.py +1 -1
  3. validmind/ai/test_descriptions.py +17 -73
  4. validmind/api_client.py +18 -1
  5. validmind/errors.py +1 -1
  6. validmind/models/r_model.py +5 -1
  7. validmind/tests/comparison.py +28 -2
  8. validmind/tests/data_validation/FeatureTargetCorrelationPlot.py +4 -9
  9. validmind/tests/model_validation/ContextualRecall.py +1 -1
  10. validmind/tests/model_validation/MeteorScore.py +1 -1
  11. validmind/tests/model_validation/ToxicityScore.py +1 -1
  12. validmind/tests/model_validation/embeddings/StabilityAnalysisRandomNoise.py +1 -1
  13. validmind/tests/model_validation/embeddings/StabilityAnalysisSynonyms.py +1 -1
  14. validmind/tests/model_validation/embeddings/StabilityAnalysisTranslation.py +1 -1
  15. validmind/tests/model_validation/embeddings/utils.py +6 -9
  16. validmind/tests/model_validation/ragas/utils.py +8 -7
  17. validmind/tests/model_validation/sklearn/MinimumROCAUCScore.py +3 -7
  18. validmind/tests/model_validation/statsmodels/RegressionModelSummary.py +7 -7
  19. validmind/tests/prompt_validation/ai_powered_test.py +5 -4
  20. validmind/tests/run.py +5 -1
  21. validmind/utils.py +35 -5
  22. validmind/vm_models/result/result.py +43 -2
  23. {validmind-2.7.8.dist-info → validmind-2.8.10.dist-info}/METADATA +5 -4
  24. {validmind-2.7.8.dist-info → validmind-2.8.10.dist-info}/RECORD +27 -32
  25. {validmind-2.7.8.dist-info → validmind-2.8.10.dist-info}/WHEEL +1 -1
  26. validmind/ai/test_result_description/config.yaml +0 -29
  27. validmind/ai/test_result_description/context.py +0 -73
  28. validmind/ai/test_result_description/image_processing.py +0 -124
  29. validmind/ai/test_result_description/system.jinja +0 -39
  30. validmind/ai/test_result_description/user.jinja +0 -30
  31. {validmind-2.7.8.dist-info → validmind-2.8.10.dist-info}/LICENSE +0 -0
  32. {validmind-2.7.8.dist-info → validmind-2.8.10.dist-info}/entry_points.txt +0 -0
validmind/__init__.py CHANGED
@@ -30,8 +30,12 @@ vm.init(
30
30
 
31
31
  After you have pasted the code snippet into your development source code and executed the code, the Python Library API will register with ValidMind. You can now use the ValidMind Library to document and test your models, and to upload to the ValidMind Platform.
32
32
  """
33
+ import threading
33
34
  import warnings
34
35
 
36
+ import pkg_resources
37
+ from IPython.display import HTML, display
38
+
35
39
  # Ignore Numba warnings. We are not requiring this package directly
36
40
  from numba.core.errors import NumbaDeprecationWarning, NumbaPendingDeprecationWarning
37
41
 
@@ -51,30 +55,74 @@ from .client import ( # noqa: E402
51
55
  )
52
56
  from .tests.decorator import tags, tasks, test
53
57
  from .tests.run import print_env
58
+ from .utils import is_notebook, parse_version
54
59
  from .vm_models.result import RawData
55
60
 
61
+ __shown = False
62
+
63
+
64
+ def show_warning(installed, running):
65
+ global __shown
66
+
67
+ if __shown:
68
+ return
69
+ __shown = True
70
+
71
+ message = (
72
+ f"⚠️ This kernel is running an older version of validmind ({running}) "
73
+ f"than the latest version installed on your system ({installed}).\n\n"
74
+ "You may need to restart the kernel if you are experiencing issues."
75
+ )
76
+ display(HTML(f"<div style='color: red;'>{message}</div>"))
77
+
78
+
79
+ def check_version():
80
+ # get the installed vs running version of validmind
81
+ # to make sure we are using the latest installed version
82
+ # in case user has updated the package but forgot to restart the kernel
83
+ installed = pkg_resources.get_distribution("validmind").version
84
+ running = __version__
85
+
86
+ if parse_version(installed) > parse_version(running):
87
+ show_warning(installed, running)
88
+
89
+ # Schedule the next check for 5 minutes from now
90
+ timer = threading.Timer(300, check_version)
91
+ timer.daemon = True
92
+ timer.start()
93
+
94
+
95
+ if is_notebook():
96
+ check_version()
97
+
56
98
  __all__ = [ # noqa
57
99
  "__version__",
58
- # Python Library API
59
- "datasets",
60
- "errors",
61
- "get_test_suite",
100
+ # main library API
62
101
  "init",
102
+ "reload",
63
103
  "init_dataset",
64
104
  "init_model",
65
105
  "init_r_model",
66
106
  "preview_template",
67
- "print_env",
68
- "RawData",
69
- "reload",
70
107
  "run_documentation_tests",
108
+ # log metric function (for direct/bulk/retroactive logging of metrics)
109
+ "log_metric",
110
+ # test suite functions (less common)
111
+ "get_test_suite",
71
112
  "run_test_suite",
113
+ # helper functions (for troubleshooting)
114
+ "print_env",
115
+ # decorators (for building tests
72
116
  "tags",
73
117
  "tasks",
74
118
  "test",
75
- "tests",
76
- "test_suites",
119
+ # raw data (for post-processing test results and building tests)
120
+ "RawData",
121
+ # submodules
122
+ "datasets",
123
+ "errors",
77
124
  "vm_models",
125
+ "tests",
78
126
  "unit_metrics",
79
- "log_metric",
127
+ "test_suites",
80
128
  ]
validmind/__version__.py CHANGED
@@ -1 +1 @@
1
- __version__ = "2.7.8"
1
+ __version__ = "2.8.10"
@@ -4,70 +4,24 @@
4
4
 
5
5
  import json
6
6
  import os
7
- import re
8
7
  from concurrent.futures import ThreadPoolExecutor
9
8
  from typing import List, Optional, Union
10
9
 
11
10
  import tiktoken
12
- from jinja2 import Template
13
11
 
14
12
  from ..client_config import client_config
15
13
  from ..logging import get_logger
16
14
  from ..utils import NumpyEncoder, md_to_html, test_id_to_name
17
15
  from ..vm_models.figure import Figure
18
16
  from ..vm_models.result import ResultTable
19
- from .utils import DescriptionFuture, get_client_and_model
17
+ from .utils import DescriptionFuture
20
18
 
21
19
  __executor = ThreadPoolExecutor()
22
- __prompt = None
23
20
 
24
21
  logger = get_logger(__name__)
25
22
 
26
23
 
27
- def _load_prompt():
28
- global __prompt
29
-
30
- if not __prompt:
31
- folder_path = os.path.join(os.path.dirname(__file__), "test_result_description")
32
- with open(os.path.join(folder_path, "system.jinja"), "r") as f:
33
- system_prompt = f.read()
34
- with open(os.path.join(folder_path, "user.jinja"), "r") as f:
35
- user_prompt = f.read()
36
-
37
- __prompt = (Template(system_prompt), Template(user_prompt))
38
-
39
- return __prompt
40
-
41
-
42
- def prompt_to_message(role, prompt):
43
- if "[[IMAGE:" not in prompt:
44
- return {"role": role, "content": prompt}
45
-
46
- content = []
47
-
48
- # Regex pattern to find [[IMAGE:<b64-data>]] markers
49
- pattern = re.compile(r"\[\[IMAGE:(.*?)\]\]", re.DOTALL)
50
-
51
- last_index = 0
52
- for match in pattern.finditer(prompt):
53
- # Text before the image marker
54
- start, end = match.span()
55
- if start > last_index:
56
- content.append({"type": "text", "text": prompt[last_index:start]})
57
-
58
- content.append({"type": "image_url", "image_url": {"url": match.group(1)}})
59
-
60
- last_index = end
61
-
62
- # Text after the last image
63
- if last_index < len(prompt):
64
- content.append({"type": "text", "text": prompt[last_index:]})
65
-
66
- return {"role": role, "content": content}
67
-
68
-
69
24
  def _get_llm_global_context():
70
-
71
25
  # Get the context from the environment variable
72
26
  context = os.getenv("VALIDMIND_LLM_DESCRIPTIONS_CONTEXT", "")
73
27
 
@@ -115,13 +69,13 @@ def generate_description(
115
69
  title: Optional[str] = None,
116
70
  ):
117
71
  """Generate the description for the test results"""
72
+ from validmind.api_client import generate_test_result_description
73
+
118
74
  if not tables and not figures and not metric:
119
75
  raise ValueError(
120
76
  "No tables, unit metric or figures provided - cannot generate description"
121
77
  )
122
78
 
123
- client, model = get_client_and_model()
124
-
125
79
  # get last part of test id
126
80
  test_name = title or test_id.split(".")[-1]
127
81
 
@@ -145,29 +99,18 @@ def generate_description(
145
99
  else:
146
100
  summary = None
147
101
 
148
- context = _get_llm_global_context()
149
-
150
- input_data = {
151
- "test_name": test_name,
152
- "test_description": test_description,
153
- "title": title,
154
- "summary": _truncate_summary(summary, test_id),
155
- "figures": [figure._get_b64_url() for figure in ([] if tables else figures)],
156
- "context": context,
157
- }
158
- system, user = _load_prompt()
159
-
160
- messages = [
161
- prompt_to_message("system", system.render(input_data)),
162
- prompt_to_message("user", user.render(input_data)),
163
- ]
164
- response = client.chat.completions.create(
165
- model=model,
166
- temperature=0.0,
167
- messages=messages,
168
- )
169
-
170
- return response.choices[0].message.content
102
+ return generate_test_result_description(
103
+ {
104
+ "test_name": test_name,
105
+ "test_description": test_description,
106
+ "title": title,
107
+ "summary": _truncate_summary(summary, test_id),
108
+ "figures": [
109
+ figure._get_b64_url() for figure in ([] if tables else figures)
110
+ ],
111
+ "context": _get_llm_global_context(),
112
+ }
113
+ )["content"]
171
114
 
172
115
 
173
116
  def background_generate_description(
@@ -238,7 +181,8 @@ def get_result_description(
238
181
  # Check the feature flag first, then the environment variable
239
182
  llm_descriptions_enabled = (
240
183
  client_config.can_generate_llm_test_descriptions()
241
- and os.getenv("VALIDMIND_LLM_DESCRIPTIONS_ENABLED", "1") not in ["0", "false"]
184
+ and os.getenv("VALIDMIND_LLM_DESCRIPTIONS_ENABLED", "1").lower()
185
+ not in ["0", "false"]
242
186
  )
243
187
 
244
188
  # TODO: fix circular import
validmind/api_client.py CHANGED
@@ -194,6 +194,7 @@ def init(
194
194
  api_host: Optional[str] = None,
195
195
  model: Optional[str] = None,
196
196
  monitoring: bool = False,
197
+ generate_descriptions: Optional[bool] = None,
197
198
  ):
198
199
  """
199
200
  Initializes the API client instances and calls the /ping endpoint to ensure
@@ -209,7 +210,7 @@ def init(
209
210
  api_secret (str, optional): The API secret. Defaults to None.
210
211
  api_host (str, optional): The API host. Defaults to None.
211
212
  monitoring (bool): The ongoing monitoring flag. Defaults to False.
212
-
213
+ generate_descriptions (bool): Whether to use GenAI to generate test result descriptions. Defaults to True.
213
214
  Raises:
214
215
  ValueError: If the API key and secret are not provided
215
216
  """
@@ -235,6 +236,9 @@ def init(
235
236
 
236
237
  _monitoring = monitoring
237
238
 
239
+ if generate_descriptions is not None:
240
+ os.environ["VALIDMIND_LLM_DESCRIPTIONS_ENABLED"] = str(generate_descriptions)
241
+
238
242
  reload()
239
243
 
240
244
 
@@ -487,3 +491,16 @@ def get_ai_key() -> Dict[str, Any]:
487
491
  raise_api_error(r.text)
488
492
 
489
493
  return r.json()
494
+
495
+
496
+ def generate_test_result_description(test_result_data: Dict[str, Any]) -> str:
497
+ r = requests.post(
498
+ url=_get_url("ai/generate/test_result_description"),
499
+ headers=_get_api_headers(),
500
+ json=test_result_data,
501
+ )
502
+
503
+ if r.status_code != 200:
504
+ raise_api_error(r.text)
505
+
506
+ return r.json()
validmind/errors.py CHANGED
@@ -228,7 +228,7 @@ class MissingRExtrasError(BaseError):
228
228
  def description(self, *args, **kwargs):
229
229
  return (
230
230
  self.message
231
- or "ValidMind r-support needs to be installed: `pip install validmind[r-support]`"
231
+ or "`rpy2` is required to use R models. Please install it with `pip install rpy2`"
232
232
  )
233
233
 
234
234
 
@@ -5,6 +5,7 @@
5
5
  import numpy as np
6
6
  import pandas as pd
7
7
 
8
+ from validmind.errors import MissingRExtrasError
8
9
  from validmind.logging import get_logger
9
10
  from validmind.vm_models.model import VMModel
10
11
 
@@ -125,7 +126,10 @@ class RModel(VMModel):
125
126
  """
126
127
  Converts the predicted probabilities to classes
127
128
  """
128
- from rpy2.robjects import pandas2ri
129
+ try:
130
+ from rpy2.robjects import pandas2ri
131
+ except ImportError:
132
+ raise MissingRExtrasError()
129
133
 
130
134
  # Activate the pandas conversion for rpy2
131
135
  pandas2ri.activate()
@@ -15,7 +15,7 @@ from validmind.vm_models.figure import (
15
15
  is_png_image,
16
16
  )
17
17
  from validmind.vm_models.input import VMInput
18
- from validmind.vm_models.result import ResultTable, TestResult
18
+ from validmind.vm_models.result import RawData, ResultTable, TestResult
19
19
 
20
20
  logger = get_logger(__name__)
21
21
 
@@ -312,6 +312,25 @@ def get_comparison_test_configs(
312
312
  return test_configs
313
313
 
314
314
 
315
+ def _combine_raw_data(results: List[TestResult]) -> RawData:
316
+ """Combine RawData objects"""
317
+ attribute_names = results[0].raw_data.__dict__.keys()
318
+
319
+ # check that all the raw data objects have the same attributes
320
+ for result in results:
321
+ if not isinstance(result.raw_data, RawData):
322
+ raise ValueError("All raw data objects must be of type RawData")
323
+ if result.raw_data.__dict__.keys() != attribute_names:
324
+ raise ValueError("RawData objects must have the same attributes")
325
+
326
+ return RawData(
327
+ **{
328
+ key: [getattr(result.raw_data, key) for result in results]
329
+ for key in attribute_names
330
+ }
331
+ )
332
+
333
+
315
334
  def combine_results(
316
335
  results: List[TestResult],
317
336
  ) -> Tuple[List[Any], Dict[str, List[Any]], Dict[str, List[Any]]]:
@@ -338,6 +357,9 @@ def combine_results(
338
357
  # handle threshold tests (i.e. tests that have pass/fail bool status)
339
358
  if results[0].passed is not None:
340
359
  combined_outputs.append(all(result.passed for result in results))
360
+ # handle raw data (if any)
361
+ if results[0].raw_data:
362
+ combined_outputs.append(_combine_raw_data(results))
341
363
 
342
364
  # combine inputs and params
343
365
  combined_inputs = {}
@@ -359,4 +381,8 @@ def combine_results(
359
381
  combined_inputs = _combine_dict_values(combined_inputs)
360
382
  combined_params = _combine_dict_values(combined_params)
361
383
 
362
- return combined_outputs, combined_inputs, combined_params
384
+ return (
385
+ tuple(combined_outputs),
386
+ combined_inputs,
387
+ combined_params,
388
+ )
@@ -52,16 +52,10 @@ def FeatureTargetCorrelationPlot(dataset, fig_height=600):
52
52
  - Not apt for models that employ complex feature interactions, like Decision Trees or Neural Networks, as the test
53
53
  may not accurately reflect their importance.
54
54
  """
55
-
56
- # Filter DataFrame based on features and target_column
57
55
  df = dataset.df[dataset.feature_columns + [dataset.target_column]]
58
56
 
59
- fig = _visualize_feature_target_correlation(df, dataset.target_column, fig_height)
60
-
61
- correlations = (
62
- df.corr(numeric_only=True)[dataset.target_column]
63
- .drop(dataset.target_column)
64
- .to_frame()
57
+ fig, correlations = _visualize_feature_target_correlation(
58
+ df, dataset.target_column, fig_height
65
59
  )
66
60
 
67
61
  return fig, RawData(correlation_data=correlations)
@@ -100,4 +94,5 @@ def _visualize_feature_target_correlation(df, target_column, fig_height):
100
94
  yaxis_title="",
101
95
  height=fig_height, # Adjust the height value as needed
102
96
  )
103
- return fig
97
+
98
+ return fig, correlations
@@ -118,4 +118,4 @@ def ContextualRecall(dataset, model):
118
118
  # Create a DataFrame from all collected statistics
119
119
  result_df = pd.DataFrame(stats_df).reset_index().rename(columns={"index": "Metric"})
120
120
 
121
- return (result_df, *tuple(figures), RawData(contextual_recall_scores=metrics_df))
121
+ return (result_df, *figures, RawData(contextual_recall_scores=metrics_df))
@@ -117,4 +117,4 @@ def MeteorScore(dataset, model):
117
117
  # Create a DataFrame from all collected statistics
118
118
  result_df = pd.DataFrame(stats_df).reset_index().rename(columns={"index": "Metric"})
119
119
 
120
- return (result_df, *tuple(figures), RawData(meteor_scores=metrics_df))
120
+ return (result_df, *figures, RawData(meteor_scores=metrics_df))
@@ -141,7 +141,7 @@ def ToxicityScore(dataset, model):
141
141
 
142
142
  return (
143
143
  result_df,
144
- *tuple(figures),
144
+ *figures,
145
145
  RawData(
146
146
  input_toxicity_df=input_df,
147
147
  true_toxicity_df=true_df,
@@ -151,4 +151,4 @@ def StabilityAnalysisRandomNoise(
151
151
  mean_similarity_threshold,
152
152
  )
153
153
 
154
- return result, RawData(original_perturbed_similarity=raw_data)
154
+ return *result, RawData(original_perturbed_similarity=raw_data)
@@ -107,4 +107,4 @@ def StabilityAnalysisSynonyms(
107
107
  mean_similarity_threshold,
108
108
  )
109
109
 
110
- return result, RawData(original_perturbed_similarity=raw_data)
110
+ return *result, RawData(original_perturbed_similarity=raw_data)
@@ -134,4 +134,4 @@ def StabilityAnalysisTranslation(
134
134
  mean_similarity_threshold,
135
135
  )
136
136
 
137
- return result, RawData(original_perturbed_similarity=raw_data)
137
+ return *result, RawData(original_perturbed_similarity=raw_data)
@@ -3,7 +3,6 @@
3
3
  # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
4
 
5
5
  import numpy as np
6
- import pandas as pd
7
6
  import plotly.express as px
8
7
  from sklearn.metrics.pairwise import cosine_similarity
9
8
 
@@ -18,14 +17,12 @@ def create_stability_analysis_result(
18
17
  original_embeddings, perturbed_embeddings
19
18
  ).diagonal()
20
19
 
21
- # create a raw dataframe of the original, perturbed and similarity
22
- raw_data = pd.DataFrame(
23
- {
24
- "original": original_embeddings,
25
- "perturbed": perturbed_embeddings,
26
- "similarity": similarities,
27
- }
28
- )
20
+ # Store raw data in a dictionary
21
+ raw_data = {
22
+ "original_embeddings": original_embeddings,
23
+ "perturbed_embeddings": perturbed_embeddings,
24
+ "similarities": similarities,
25
+ }
29
26
 
30
27
  mean = np.mean(similarities)
31
28
  passed = mean > mean_similarity_threshold
@@ -4,24 +4,25 @@
4
4
 
5
5
  import os
6
6
 
7
- from validmind.ai.utils import get_client_and_model
8
- from validmind.client_config import client_config
7
+ from validmind.ai.utils import get_client_and_model, is_configured
9
8
 
10
9
  EMBEDDINGS_MODEL = "text-embedding-3-small"
11
10
 
12
11
 
13
12
  def get_ragas_config():
14
- if not client_config.can_generate_llm_test_descriptions():
15
- raise ValueError(
16
- "LLM based descriptions are not enabled in the current configuration."
17
- )
18
-
19
13
  # import here since its an optional dependency
20
14
  try:
21
15
  from langchain_openai import ChatOpenAI, OpenAIEmbeddings
22
16
  except ImportError:
23
17
  raise ImportError("Please run `pip install validmind[llm]` to use LLM tests")
24
18
 
19
+ if not is_configured():
20
+ raise ValueError(
21
+ "LLM is not configured. Please set an `OPENAI_API_KEY` environment variable "
22
+ "or ensure that you are connected to the ValidMind API and ValidMind AI is "
23
+ "enabled for your account."
24
+ )
25
+
25
26
  client, model = get_client_and_model()
26
27
  os.environ["OPENAI_API_BASE"] = str(client.base_url)
27
28
 
@@ -62,18 +62,14 @@ def MinimumROCAUCScore(dataset: VMDataset, model: VMModel, min_threshold: float
62
62
  lb = LabelBinarizer()
63
63
  lb.fit(y_true)
64
64
 
65
- y_true_binarized = lb.transform(y_true)
66
- y_score_binarized = lb.transform(dataset.y_pred(model))
67
-
68
65
  roc_auc = roc_auc_score(
69
- y_true=y_true_binarized,
70
- y_score=y_score_binarized,
66
+ y_true=lb.transform(y_true),
67
+ y_score=lb.transform(dataset.y_pred(model)),
71
68
  average="macro",
72
69
  )
73
70
 
74
71
  else:
75
- y_score_prob = dataset.y_prob(model)
76
- roc_auc = roc_auc_score(y_true=y_true, y_score=y_score_prob)
72
+ roc_auc = roc_auc_score(y_true=y_true, y_score=dataset.y_prob(model))
77
73
 
78
74
  return [
79
75
  {
@@ -45,17 +45,17 @@ def RegressionModelSummary(dataset: VMDataset, model: VMModel):
45
45
  - A high R-Squared or Adjusted R-Squared may not necessarily indicate a good model, especially in cases of
46
46
  overfitting.
47
47
  """
48
+ y_true = dataset.y
49
+ y_pred = dataset.y_pred(model)
50
+
48
51
  return [
49
52
  {
50
53
  "Independent Variables": dataset.feature_columns,
51
- "R-Squared": r2_score(dataset.y, dataset.y_pred(model)),
54
+ "R-Squared": r2_score(y_true, y_pred),
52
55
  "Adjusted R-Squared": adj_r2_score(
53
- dataset.y,
54
- dataset.y_pred(model),
55
- len(dataset.y),
56
- len(dataset.feature_columns),
56
+ y_true, y_pred, len(y_true), len(dataset.feature_columns)
57
57
  ),
58
- "MSE": mean_squared_error(dataset.y, dataset.y_pred(model), squared=True),
59
- "RMSE": mean_squared_error(dataset.y, dataset.y_pred(model), squared=False),
58
+ "MSE": mean_squared_error(y_true, y_pred, squared=True),
59
+ "RMSE": mean_squared_error(y_true, y_pred, squared=False),
60
60
  }
61
61
  ]
@@ -4,8 +4,7 @@
4
4
 
5
5
  import re
6
6
 
7
- from validmind.ai.utils import get_client_and_model
8
- from validmind.client_config import client_config
7
+ from validmind.ai.utils import get_client_and_model, is_configured
9
8
 
10
9
  missing_prompt_message = """
11
10
  Cannot run prompt validation tests on a model with no prompt.
@@ -25,9 +24,11 @@ def call_model(
25
24
  system_prompt: str, user_prompt: str, temperature: float = 0.0, seed: int = 42
26
25
  ):
27
26
  """Call LLM with the given prompts and return the response"""
28
- if not client_config.can_generate_llm_test_descriptions():
27
+ if not is_configured():
29
28
  raise ValueError(
30
- "LLM based descriptions are not enabled for your organization."
29
+ "LLM is not configured. Please set an `OPENAI_API_KEY` environment variable "
30
+ "or ensure that you are connected to the ValidMind API and ValidMind AI is "
31
+ "enabled for your account."
31
32
  )
32
33
 
33
34
  client, model = get_client_and_model()
validmind/tests/run.py CHANGED
@@ -256,7 +256,7 @@ def _run_comparison_test(
256
256
  combined_outputs, combined_inputs, combined_params = combine_results(results)
257
257
 
258
258
  return build_test_result(
259
- outputs=tuple(combined_outputs),
259
+ outputs=combined_outputs,
260
260
  test_id=test_id,
261
261
  test_doc=test_doc,
262
262
  inputs=combined_inputs,
@@ -400,5 +400,9 @@ def run_test( # noqa: C901
400
400
 
401
401
 
402
402
  def print_env():
403
+ """Prints a log of the running environment for debugging.
404
+
405
+ Output includes: ValidMind Library version, operating system details, installed dependencies, and the ISO 8601 timestamp at log creation.
406
+ """
403
407
  e = _get_run_metadata()
404
408
  pprint.pp(e)
validmind/utils.py CHANGED
@@ -60,6 +60,19 @@ pylab.rcParams.update(params)
60
60
  logger = get_logger(__name__)
61
61
 
62
62
 
63
+ def parse_version(version: str) -> tuple[int, ...]:
64
+ """
65
+ Parse a semver version string into a tuple of major, minor, patch integers
66
+
67
+ Args:
68
+ version (str): The semantic version string to parse
69
+
70
+ Returns:
71
+ tuple[int, ...]: A tuple of major, minor, patch integers
72
+ """
73
+ return tuple(int(x) for x in version.split(".")[:3])
74
+
75
+
63
76
  def is_notebook() -> bool:
64
77
  """
65
78
  Checks if the code is running in a Jupyter notebook or IPython shell
@@ -110,6 +123,7 @@ class NumpyEncoder(json.JSONEncoder):
110
123
  self.is_numpy_ndarray: lambda obj: obj.tolist(),
111
124
  self.is_numpy_bool: lambda obj: bool(obj),
112
125
  self.is_pandas_timestamp: lambda obj: str(obj),
126
+ self.is_numpy_datetime64: lambda obj: str(obj),
113
127
  self.is_set: lambda obj: list(obj),
114
128
  self.is_quantlib_date: lambda obj: obj.ISO(),
115
129
  self.is_generic_object: self.handle_generic_object,
@@ -142,6 +156,9 @@ class NumpyEncoder(json.JSONEncoder):
142
156
  def is_pandas_timestamp(self, obj):
143
157
  return isinstance(obj, pd.Timestamp)
144
158
 
159
+ def is_numpy_datetime64(self, obj):
160
+ return isinstance(obj, np.datetime64)
161
+
145
162
  def is_set(self, obj):
146
163
  return isinstance(obj, set)
147
164
 
@@ -152,11 +169,12 @@ class NumpyEncoder(json.JSONEncoder):
152
169
  return isinstance(obj, object)
153
170
 
154
171
  def handle_generic_object(self, obj):
155
- return (
156
- obj.__str__()
157
- if type(obj).__dict__.get("__str__")
158
- else str(obj).split(".")[1].split(" ")[0]
159
- )
172
+ try:
173
+ if hasattr(obj, "__str__"):
174
+ return obj.__str__()
175
+ return obj.__class__.__name__
176
+ except Exception:
177
+ return str(type(obj).__name__)
160
178
 
161
179
  def encode(self, obj):
162
180
  obj = nan_to_none(obj)
@@ -177,6 +195,18 @@ class HumanReadableEncoder(NumpyEncoder):
177
195
  else obj.tolist()
178
196
  )
179
197
 
198
+ def default(self, obj):
199
+ if self.is_dataframe(obj):
200
+ return {
201
+ "type": str(type(obj)),
202
+ "preview": obj.head(5).to_dict(orient="list"),
203
+ "shape": f"{obj.shape[0]} rows x {obj.shape[1]} columns",
204
+ }
205
+ return super().default(obj)
206
+
207
+ def is_dataframe(self, obj):
208
+ return isinstance(obj, pd.DataFrame)
209
+
180
210
 
181
211
  def get_full_typename(o: Any) -> Any:
182
212
  """We determine types based on type names so we don't have to import
@@ -171,6 +171,7 @@ class TestResult(Result):
171
171
  metadata: Optional[Dict[str, Any]] = None
172
172
  _was_description_generated: bool = False
173
173
  _unsafe: bool = False
174
+ _client_config_cache: Optional[Any] = None
174
175
 
175
176
  def __post_init__(self):
176
177
  if self.ref_id is None:
@@ -329,13 +330,50 @@ class TestResult(Result):
329
330
 
330
331
  return VBox(widgets)
331
332
 
333
+ @classmethod
334
+ def _get_client_config(cls):
335
+ """Get the client config, loading it if not cached"""
336
+ if cls._client_config_cache is None:
337
+ api_client.reload()
338
+ cls._client_config_cache = api_client.client_config
339
+
340
+ if cls._client_config_cache is None:
341
+ raise ValueError(
342
+ "Failed to load client config: api_client.client_config is None"
343
+ )
344
+
345
+ if not hasattr(cls._client_config_cache, "documentation_template"):
346
+ raise ValueError(
347
+ "Invalid client config: missing documentation_template"
348
+ )
349
+
350
+ return cls._client_config_cache
351
+
352
+ def check_result_id_exist(self):
353
+ """Check if the result_id exists in any test block across all sections"""
354
+ client_config = self._get_client_config()
355
+
356
+ # Iterate through all sections
357
+ for section in client_config.documentation_template["sections"]:
358
+ blocks = section.get("contents", [])
359
+ # Check each block in the section
360
+ for block in blocks:
361
+ if (
362
+ block.get("content_type") == "test"
363
+ and block.get("content_id") == self.result_id
364
+ ):
365
+ return
366
+
367
+ logger.info(
368
+ f"Test driven block with result_id {self.result_id} does not exist in model's document"
369
+ )
370
+
332
371
  def _validate_section_id_for_block(
333
372
  self, section_id: str, position: Union[int, None] = None
334
373
  ):
335
374
  """Validate the section_id exits on the template before logging"""
336
- api_client.reload()
375
+ client_config = self._get_client_config()
337
376
  found = False
338
- client_config = api_client.client_config
339
377
 
340
378
  for section in client_config.documentation_template["sections"]:
341
379
  if section["id"] == section_id:
@@ -440,6 +478,9 @@ class TestResult(Result):
440
478
  unsafe (bool): If True, log the result even if it contains sensitive data
441
479
  i.e. raw data from input datasets
442
480
  """
481
+
482
+ self.check_result_id_exist()
483
+
443
484
  if not unsafe:
444
485
  for table in self.tables or []:
445
486
  check_for_sensitive_data(table.data, self._get_flat_inputs())
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: validmind
3
- Version: 2.7.8
3
+ Version: 2.8.10
4
4
  Summary: ValidMind Library
5
5
  License: Commercial License
6
6
  Author: Andres Rodriguez
@@ -15,8 +15,8 @@ Provides-Extra: all
15
15
  Provides-Extra: huggingface
16
16
  Provides-Extra: llm
17
17
  Provides-Extra: pytorch
18
- Provides-Extra: r-support
19
18
  Requires-Dist: aiohttp[speedups]
19
+ Requires-Dist: anywidget (>=0.9.13,<0.10.0)
20
20
  Requires-Dist: arch
21
21
  Requires-Dist: bert-score (>=0.3.13)
22
22
  Requires-Dist: catboost
@@ -42,7 +42,6 @@ Requires-Dist: pycocoevalcap (>=1.2,<2.0) ; extra == "all" or extra == "llm"
42
42
  Requires-Dist: python-dotenv
43
43
  Requires-Dist: ragas (>=0.2.3) ; extra == "all" or extra == "llm"
44
44
  Requires-Dist: rouge (>=1)
45
- Requires-Dist: rpy2 (>=3.5.10,<4.0.0) ; extra == "all" or extra == "r-support"
46
45
  Requires-Dist: scikit-learn (<1.6.0)
47
46
  Requires-Dist: scipy
48
47
  Requires-Dist: scorecardpy (>=0.1.9.6,<0.2.0.0)
@@ -53,6 +52,7 @@ Requires-Dist: shap (==0.44.1)
53
52
  Requires-Dist: statsmodels
54
53
  Requires-Dist: tabulate (>=0.8.9,<0.9.0)
55
54
  Requires-Dist: textblob (>=0.18.0.post0,<0.19.0)
55
+ Requires-Dist: tiktoken
56
56
  Requires-Dist: torch (>=1.10.0) ; extra == "all" or extra == "llm" or extra == "pytorch"
57
57
  Requires-Dist: tqdm
58
58
  Requires-Dist: transformers (>=4.32.0,<5.0.0) ; extra == "all" or extra == "huggingface" or extra == "llm"
@@ -131,6 +131,7 @@ The ValidMind Library has optional dependencies that can be installed separately
131
131
  - **R Models**: To use R models with the ValidMind Library, install the `r` extra:
132
132
 
133
133
  ```bash
134
- pip install validmind[r-support]
134
+ pip install validmind
135
+ pip install rpy2
135
136
  ```
136
137
 
@@ -1,13 +1,8 @@
1
- validmind/__init__.py,sha256=7qIV3pfZ9K4Gnq11Nkm8TutNKdAmPtUY9DZ0N_OaKks,2738
2
- validmind/__version__.py,sha256=mR2b-4HRoh6LXNm4djBaMS7QTj4ABCzJ36BRjVHKPQU,22
3
- validmind/ai/test_descriptions.py,sha256=ZApQ9PGBP1PI6HgyRG7Dwfo1C8t_89WRhVv1-H-i0Y4,8438
4
- validmind/ai/test_result_description/config.yaml,sha256=E1gPd-uv-MzdrWZA_rP6LSk8pVmkYijx6v78hZ8ceL0,787
5
- validmind/ai/test_result_description/context.py,sha256=ebKulFMpXTDLqd6lOHAsG200GmLNnhnu7sMDnbo2Dhc,2339
6
- validmind/ai/test_result_description/image_processing.py,sha256=JNaO1zyM9293WWuyzUp1meQQbHuut0XN4kKUGzQTwYY,4061
7
- validmind/ai/test_result_description/system.jinja,sha256=BjMvZCC3UXEH8p3VPpnHtGjhnqnbNcEG2_kYZ_QZrgg,2358
8
- validmind/ai/test_result_description/user.jinja,sha256=CmqPQQiqdXjxtq47wFCZ-IT5csliWsRVM04psKxzXc4,689
1
+ validmind/__init__.py,sha256=4ukEysjRwf6X2RcK8_OVY-z5fTjnM4GQIiVERFyzRPY,4194
2
+ validmind/__version__.py,sha256=93aDjSnP93wggEKIISA3t5wll8Pc68HgFa7UDIxqdwo,23
3
+ validmind/ai/test_descriptions.py,sha256=VQ2LaWXvSEQZ8569TN9DYDfgDLlMv96wIjUb9MGJlHk,6882
9
4
  validmind/ai/utils.py,sha256=YHqXtmACjcL5imDS9_nzmz8MhQJzK3VybcDXMbj1SbQ,4168
10
- validmind/api_client.py,sha256=fo2NbgdW5y0sUmpSLLRJ7pGs4eU8LvnjC3l-eZzSTSo,14842
5
+ validmind/api_client.py,sha256=II9dggnuZwagZQaTHMqCkfS9fKrMCZO0cF6at0kBO2w,15456
11
6
  validmind/client.py,sha256=lOv4lSZGDOUMxOa2FpNgAiT_GaEolffZTfvljewhl2I,18595
12
7
  validmind/client_config.py,sha256=CzbeS9GZrgAdx-6DssRK5XwEMOcujQVRgji6EtAzxYI,1358
13
8
  validmind/datasets/__init__.py,sha256=c0hQZN_6GrUEJxdFHdQaEsQrSYNABG84ZCY0H-PzOZk,260
@@ -64,7 +59,7 @@ validmind/datasets/regression/models/fred_loan_rates_model_2.pkl,sha256=J1ukMdeF
64
59
  validmind/datasets/regression/models/fred_loan_rates_model_3.pkl,sha256=IogZPcUQc1F_v11fR6KWT-nRt5JzvK5f7p4Hrw7vLps,40063
65
60
  validmind/datasets/regression/models/fred_loan_rates_model_4.pkl,sha256=cSxhpcrI4hCbxCwZwE2-nr7KObbWpDii3NzpECoXmmM,48292
66
61
  validmind/datasets/regression/models/fred_loan_rates_model_5.pkl,sha256=FkNLHq9xkPMbYks_vyMjFL371mw9SQYbP1iX9lY4Ljo,60343
67
- validmind/errors.py,sha256=QrobGZtTH7SC91pFHj2Yal11gVxX0np9W2fKBfVxn-E,8074
62
+ validmind/errors.py,sha256=_zM-CABfIGz6hirW4nuukyOgYkFCZ8xh55Jn5DSjd6g,8074
68
63
  validmind/html_templates/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
69
64
  validmind/html_templates/content_blocks.py,sha256=vFMRS4Ogq4RZq88WzG3teNEOq3U4OLgLDzD3lBx4h-g,4050
70
65
  validmind/input_registry.py,sha256=8C_mrhgLT72hwbt_lo3ZwXb5NCyIcSuCQI1HdJ3bK2A,1042
@@ -76,7 +71,7 @@ validmind/models/huggingface.py,sha256=DMHekLpWi6c4N0svh-3G0NYYwzxPXOvqiU95M4Qff
76
71
  validmind/models/metadata.py,sha256=PMcdYuACkSPvuG8io5BhZeMwclQr_q79mXbvd1SC-7I,1665
77
72
  validmind/models/pipeline.py,sha256=nSskKWxaS4SGmx_B0IAvS5ogDZyh6tdx_aUkyxSXt88,2051
78
73
  validmind/models/pytorch.py,sha256=aAEUWtISwLh-PMvHkcLwBEbBStAByt4J-NpK-Ndv38E,1826
79
- validmind/models/r_model.py,sha256=eYdpCREgBpYv-PxJDuG91I77OOAx3-43FoaYT560ziE,7172
74
+ validmind/models/r_model.py,sha256=TPUwPmxz3cNzJ1bAA5vz6P9xS6deVcLTuIO1e7rD1vY,7306
80
75
  validmind/models/sklearn.py,sha256=lOCJlP2wvd5IJHtBS1XG9FXrtIvO_f8xm2Qp1UdsiBw,2406
81
76
  validmind/template.py,sha256=-j7UmM9v7I_VIZltWrmX5scbeDTbRDrR7hTQUzy6AVg,7307
82
77
  validmind/test_suites/__init__.py,sha256=wC_ZgVykFZAOrs1LTM7bE8r7mTSjxF54F9amUdT3nT4,6953
@@ -95,7 +90,7 @@ validmind/test_suites/time_series.py,sha256=3hzWc9gXHBf8aMecD-1YYGFS5RI1o8A-eor9
95
90
  validmind/tests/__init__.py,sha256=U6wUS7R8lYFjwUZmAkG7gC8Av9Z4TTDZS7uWZqbzxVM,1665
96
91
  validmind/tests/__types__.py,sha256=3if3CVI-YHWQpX0t_8WYh1a6dd01us0oP7r2ZsX2NX0,12589
97
92
  validmind/tests/_store.py,sha256=p1qBYOkdphd1aOWwnL6ybPfz3qSWJs2engLv-zFVl5A,2368
98
- validmind/tests/comparison.py,sha256=lqpaRXZVNqMYNf_mbtVXOGkEtygL4edAyvQDwX4nFf8,12488
93
+ validmind/tests/comparison.py,sha256=7LJFr_6sVVOAxLRX1LYylJ8oZF0UdrVV7tFfWIUfcWI,13336
99
94
  validmind/tests/data_validation/ACFandPACFPlot.py,sha256=Xo_xildBWlKJSbycH-IqXh5PkpBtEFym5WN_m9VKDwU,4276
100
95
  validmind/tests/data_validation/ADF.py,sha256=UHIYCJuqQYJTtOvdzj6ALJcLH8kCprr0PlfESIx_G_E,3969
101
96
  validmind/tests/data_validation/AutoAR.py,sha256=QQGezpGkYS0m0isT3BDhBAGMh8CS8ZU2HsU3G4p9lY0,5238
@@ -111,7 +106,7 @@ validmind/tests/data_validation/DescriptiveStatistics.py,sha256=l6Ne4y5A9HC2vlPs
111
106
  validmind/tests/data_validation/DickeyFullerGLS.py,sha256=8zR7I77082RfYOyOlmYX7OLURXJ8_G1Yh9Esb27FzFQ,4193
112
107
  validmind/tests/data_validation/Duplicates.py,sha256=HAEHRFwFZovJU-wBWea0KJREsJCd9R8jarazxJ3ZqI8,3219
113
108
  validmind/tests/data_validation/EngleGrangerCoint.py,sha256=kNBbxLYweF8qTF5JVRzcyXq3aKLhkN_1iv3mwwskTBU,4503
114
- validmind/tests/data_validation/FeatureTargetCorrelationPlot.py,sha256=6q-RYIC9hIk0mcKuIntpikYsO3UguCHIeeM8wXI1FAI,4441
109
+ validmind/tests/data_validation/FeatureTargetCorrelationPlot.py,sha256=IEQYOakooUJRLe1CghVk0u4RJdLZ3IA-nnYveqgKJVE,4281
115
110
  validmind/tests/data_validation/HighCardinality.py,sha256=Li6xa46crnmdx9XliuHbDDVH4_KzcUT4T7Z16cRYQnY,3546
116
111
  validmind/tests/data_validation/HighPearsonCorrelation.py,sha256=f8AtgstLTY5msFfDkCEe3hScHmVr8A2rL4OgdJSdkYQ,3779
117
112
  validmind/tests/data_validation/IQROutliersBarPlot.py,sha256=BrhqjiV5EpQAgPI_6GUdF5NLA3O5GbJk50dXbi-9G0U,5182
@@ -172,9 +167,9 @@ validmind/tests/load.py,sha256=UpMHEMolnwQ8z3EYNJvNj186H3JUgQvz_eRCBrngtBo,11003
172
167
  validmind/tests/model_validation/BertScore.py,sha256=R6jnQ9cDKbHpaJFtf2wTHkWtKX_DCRzNm6NkOmgGLJ8,5757
173
168
  validmind/tests/model_validation/BleuScore.py,sha256=xFPkmxVbM4ST3Bvi5IH9Sh_BJU_Civ-Slh66AoZgRec,5134
174
169
  validmind/tests/model_validation/ClusterSizeDistribution.py,sha256=mZcLjbHjGJ4ltt5juyuGRC5Fm7oqDB6ZPBxCvd1-h_k,3284
175
- validmind/tests/model_validation/ContextualRecall.py,sha256=maDskSjpMFDe7wbQIMS5TK_fG04I9QY3oM63kRgEhhE,5263
170
+ validmind/tests/model_validation/ContextualRecall.py,sha256=0W1xkiDff09ql7x9N0JW4qafCKyLdCttbbybDTJE9pQ,5256
176
171
  validmind/tests/model_validation/FeaturesAUC.py,sha256=LWURhngE04h3r_UA0JQIyoHMYsrJTRQHHlmK2UnR0P4,4010
177
- validmind/tests/model_validation/MeteorScore.py,sha256=0AhyCHy4eYJvWduq9JY0RRtbv4rz19OcsEWCdTdHhmQ,5253
172
+ validmind/tests/model_validation/MeteorScore.py,sha256=PXLfyZ1tFDoYfZLniAxOpfSRJDFvGdaUm5LKH_5XotA,5246
178
173
  validmind/tests/model_validation/ModelMetadata.py,sha256=UiDANMTqAy0DURnnTzImYNS-3Z8sE4yFjg1c2S5YFS8,2521
179
174
  validmind/tests/model_validation/ModelPredictionResiduals.py,sha256=mZmE0RWTUHigJJ3EvrzWcHjb5nvePYGH2PvZTQl2ZNc,3524
180
175
  validmind/tests/model_validation/RegardScore.py,sha256=cE3BmI1SQ3rlRbrREio8mooaG2BQ0BC_AHMFexbJ3Xk,5562
@@ -184,7 +179,7 @@ validmind/tests/model_validation/TimeSeriesPredictionWithCI.py,sha256=JanMDaf-76
184
179
  validmind/tests/model_validation/TimeSeriesPredictionsPlot.py,sha256=zoxONIl_a_mfOLuxnfVEMNQ3jL72pp5J4BDAQP5e-P0,2280
185
180
  validmind/tests/model_validation/TimeSeriesR2SquareBySegments.py,sha256=JIyER-vwXnQyHdxmpCULYm4iMkB_LaLHHQuzyygumbw,4000
186
181
  validmind/tests/model_validation/TokenDisparity.py,sha256=_gn-LSghJa8hBtCKxfNKw7rAC8h1uyrFhcX8BIaPoUs,4422
187
- validmind/tests/model_validation/ToxicityScore.py,sha256=0XjxALRwXh_gM-UwCXpAopgE-ZsgNQOlxbWOgef01Kw,5472
182
+ validmind/tests/model_validation/ToxicityScore.py,sha256=s-EPOn6Apo7dD_bw0wcmTez3P-lv6hfLV61UHuLMsgg,5465
188
183
  validmind/tests/model_validation/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
189
184
  validmind/tests/model_validation/embeddings/ClusterDistribution.py,sha256=q0v88QINg5BQWuFbQrvVb_lP-lLEQt_h1f3Af4TX6Bc,3056
190
185
  validmind/tests/model_validation/embeddings/CosineSimilarityComparison.py,sha256=Xtycolawy2Z7hhShfdlLpYvpc1lgQrISCL75ltr8s1g,5138
@@ -196,11 +191,11 @@ validmind/tests/model_validation/embeddings/EuclideanDistanceComparison.py,sha25
196
191
  validmind/tests/model_validation/embeddings/EuclideanDistanceHeatmap.py,sha256=TvB2yi5C1OXtCgiXy4z9CBGbUiNEwEVzgG69gmpb3hQ,3393
197
192
  validmind/tests/model_validation/embeddings/PCAComponentsPairwisePlots.py,sha256=lF4QC5Ueihxt-ON1GyUTwVJ9_qPizbtvt9OoPt39Gi4,3919
198
193
  validmind/tests/model_validation/embeddings/StabilityAnalysisKeyword.py,sha256=aGsE1CfpfnNID631H15hnwAXtiDoy-HooIBMNuu8am4,4161
199
- validmind/tests/model_validation/embeddings/StabilityAnalysisRandomNoise.py,sha256=YjDEVZNHuyQuJ2xC5e7fEkIlxa4Dj02z7SZpRr3yhes,5763
200
- validmind/tests/model_validation/embeddings/StabilityAnalysisSynonyms.py,sha256=5urEK4DAC8h8aKKDLrVF7xyF75dZxt0x3zYoejWdyZk,4651
201
- validmind/tests/model_validation/embeddings/StabilityAnalysisTranslation.py,sha256=DM7HrUKhqVu0oXsrxz4KG1yM6u6b6wEdvMyPwotk0hM,5759
194
+ validmind/tests/model_validation/embeddings/StabilityAnalysisRandomNoise.py,sha256=pDN9aAjPNsHwo94biDtGBBJk7Daqv9UELIu8lCA_ZAs,5764
195
+ validmind/tests/model_validation/embeddings/StabilityAnalysisSynonyms.py,sha256=SsFgUYPG8M8RbpcQKZzlx6aG4dG9yWK1zW3fgK52OUc,4652
196
+ validmind/tests/model_validation/embeddings/StabilityAnalysisTranslation.py,sha256=NDQeTt2Y3r241y3pUYhnkKtAhkOJZmE-8O6xxcW55dI,5760
202
197
  validmind/tests/model_validation/embeddings/TSNEComponentsPairwisePlots.py,sha256=roCTA0QXU8GJoQIQ9X-FKeQ2LjBGwyH5IJTdZFnreRU,4645
203
- validmind/tests/model_validation/embeddings/utils.py,sha256=jVfPEwJBvtDMU5pC1zK7uEwQNNZd2hmMrZ3HP8jRaN8,1970
198
+ validmind/tests/model_validation/embeddings/utils.py,sha256=Hr8jpVB0YfaOEYsO_tiwhU1UgXoJFHHlRqFcHDNXHoU,1896
204
199
  validmind/tests/model_validation/ragas/AnswerCorrectness.py,sha256=P1-sAWe4KGwgwJ0F44BY9Nd1wvCmaftG6tRZwsi9ero,5496
205
200
  validmind/tests/model_validation/ragas/AspectCritic.py,sha256=KsETxsVxSYAxAMbxyssL-c1AU0qUOFMz7I5D2pu-fY0,6936
206
201
  validmind/tests/model_validation/ragas/ContextEntityRecall.py,sha256=dQCBtiq4BF-C2covBaNxU0Ok_FYTDEN3M0U6ynwsAq0,5328
@@ -211,7 +206,7 @@ validmind/tests/model_validation/ragas/Faithfulness.py,sha256=SZCrgzfpcatfva9sYL
211
206
  validmind/tests/model_validation/ragas/NoiseSensitivity.py,sha256=oAcwDhXDEYLT0q3e8quHuYWdp7OJg9ycBvu05k6u6JI,6506
212
207
  validmind/tests/model_validation/ragas/ResponseRelevancy.py,sha256=vsfhXy-DBtIWEGsJZeR9xcE2V6-eLR2ui-qO1-96BLQ,5588
213
208
  validmind/tests/model_validation/ragas/SemanticSimilarity.py,sha256=wjEkCikFLweTdz3Tg5CzikWqusdSsWMwaD_4yntBCb4,4820
214
- validmind/tests/model_validation/ragas/utils.py,sha256=VCc3NcNLIwrYQ7RvuJ1ev4XhI86TKDVNzI8o12aHFHc,3363
209
+ validmind/tests/model_validation/ragas/utils.py,sha256=EV3aHvWMOgVs6EIRS0nfOa7BRwpF-eSqlOQfsoZFTC8,3429
215
210
  validmind/tests/model_validation/sklearn/AdjustedMutualInformation.py,sha256=5kk-etKeA7jfo6twQ4JVPEuNvWh0TBhhXUQL7SkrrWM,2858
216
211
  validmind/tests/model_validation/sklearn/AdjustedRandIndex.py,sha256=NCUM80CIFrV4Qm0P0wxMdf20y-BwLnPEJxOiPtv1eGk,2706
217
212
  validmind/tests/model_validation/sklearn/CalibrationCurve.py,sha256=-chYMKediopXV7YVNuaGhtb-RpIdgEsEpL1iy8aAgp4,4159
@@ -228,7 +223,7 @@ validmind/tests/model_validation/sklearn/HyperParametersTuning.py,sha256=Z7ibpCy
228
223
  validmind/tests/model_validation/sklearn/KMeansClustersOptimization.py,sha256=wisUO4461arN6YPQ2lazbgnI2cOVjbSiDObt3T8IHYY,5279
229
224
  validmind/tests/model_validation/sklearn/MinimumAccuracy.py,sha256=2FVtoEMUJJYUxDW6WwC5agAojtt7FUnO7nwcVaqPKao,2773
230
225
  validmind/tests/model_validation/sklearn/MinimumF1Score.py,sha256=CBOGD_wCqcHgMbKfp5TGTc1H8mJoG3RwMRSOUFHVGDc,3069
231
- validmind/tests/model_validation/sklearn/MinimumROCAUCScore.py,sha256=LyL2fde8pDcKozSUcDOd_IQF3AI8Z6jaq-wxL3guw2k,3624
226
+ validmind/tests/model_validation/sklearn/MinimumROCAUCScore.py,sha256=rNPD-qxdFKuUs8SWa0yIFIjjt8svPeXWNKWQrLy8uJc,3497
232
227
  validmind/tests/model_validation/sklearn/ModelParameters.py,sha256=CF3cZGJLxiABnf1CQ_u_iX_ylgvpElH3jF2DBXbXZJY,3060
233
228
  validmind/tests/model_validation/sklearn/ModelsPerformanceComparison.py,sha256=wDxGUXgfzLA80wfjoRz7CzHO8NiQfuJyxIfuVFOuLYA,4658
234
229
  validmind/tests/model_validation/sklearn/OverfitDiagnosis.py,sha256=tpPf4lL_mfpMne6ei80QEYUZd1ual8w4SbXTKS_0nfY,9960
@@ -261,7 +256,7 @@ validmind/tests/model_validation/statsmodels/RegressionFeatureSignificance.py,sh
261
256
  validmind/tests/model_validation/statsmodels/RegressionModelForecastPlot.py,sha256=XpzPACbdKkjP5egxESDUYb7aCZ8_VmJpMHCy3joEHmA,3648
262
257
  validmind/tests/model_validation/statsmodels/RegressionModelForecastPlotLevels.py,sha256=4khg8CsgMWAQZVDOqky7_gcPqDS19d19n0FTTbh1cGM,3127
263
258
  validmind/tests/model_validation/statsmodels/RegressionModelSensitivityPlot.py,sha256=UbtvxJho7yISQdsNcHOjUZwBASTY6TQmIpwLddYEsYo,4704
264
- validmind/tests/model_validation/statsmodels/RegressionModelSummary.py,sha256=ohX9OEVdofir7oK1rAKxbSImaxcsx_ZvjBk33piQAeA,2419
259
+ validmind/tests/model_validation/statsmodels/RegressionModelSummary.py,sha256=3ljShlVkblJ2RNKSaTvsJ9QtdQGac-0ZcP1ne1bq94U,2354
265
260
  validmind/tests/model_validation/statsmodels/RegressionPermutationFeatureImportance.py,sha256=04vCwLMDUguS0z0Krbk9ubzrF5HVq0GGe9JqvKA7qQ4,3915
266
261
  validmind/tests/model_validation/statsmodels/ScorecardHistogram.py,sha256=0hnB6icasRKT_Cl0YxMEpIuaUKgi5scXHmV_nP9RmkI,4650
267
262
  validmind/tests/model_validation/statsmodels/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -290,8 +285,8 @@ validmind/tests/prompt_validation/NegativeInstruction.py,sha256=qGh_hI21zBfffSjD
290
285
  validmind/tests/prompt_validation/Robustness.py,sha256=4FShTFGzHcfg9ePfd_lbVt28MOmvlQlL7lF2VZALO2U,5695
291
286
  validmind/tests/prompt_validation/Specificity.py,sha256=nBKoBvE4vFisXNZX085h0rJN_m1cS9c05J4Ku53tuwQ,4790
292
287
  validmind/tests/prompt_validation/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
293
- validmind/tests/prompt_validation/ai_powered_test.py,sha256=Lc8WU-rJ50e_NbLGV3YZ-W9t6Vj2T-o7hMxZbUrv3pw,2229
294
- validmind/tests/run.py,sha256=sO1YUFcM7Ng3yuVdC9cOR-7iOqmJF5EiE7gfuAateZg,13497
288
+ validmind/tests/prompt_validation/ai_powered_test.py,sha256=pVmJPZVw1t0a3AMcGvBYEDR_sqI1Yu0AZjbshqBUVIg,2302
289
+ validmind/tests/run.py,sha256=eIdR8If94ctaY25sWfygsLRi_0XoeJjqSZfdivocKU8,13703
295
290
  validmind/tests/test_providers.py,sha256=BceVuM_-bfQ4Zp-a5wwcP_wHeM6IOUpPIq1-MeT2-VY,6250
296
291
  validmind/tests/utils.py,sha256=mQuf1qgewPiE_pFN8iOoPSCGdyFqb4jbMFBVN3S3S2o,3526
297
292
  validmind/unit_metrics/__init__.py,sha256=lXeTJh8uq0TBRQHDBVhzKiHoV2eG9xOkHkI_pDXnkPU,952
@@ -311,7 +306,7 @@ validmind/unit_metrics/regression/MeanSquaredError.py,sha256=h-zgtlR3aigQwMGbi55
311
306
  validmind/unit_metrics/regression/QuantileLoss.py,sha256=rs0m9w4zIL6daQOHqYY-sEeQs6SDTpd0t3cN_KFZyqA,518
312
307
  validmind/unit_metrics/regression/RSquaredScore.py,sha256=z8-E-KSewvma9nu1OSUv97IfmFLpV5-rOq15jtlxklg,459
313
308
  validmind/unit_metrics/regression/RootMeanSquaredError.py,sha256=uIDsSpy75Z7W3zu4LditvW3mPJIkGxf-PdFQ7szWBZU,603
314
- validmind/utils.py,sha256=HXau6k_NkG4gYdtVQEn2h7P9mo0RzPpRWAKZUTOzXGk,18531
309
+ validmind/utils.py,sha256=3T0uv_6cH0T_48utYvz9xWFPl-5oa0E74RxG5FsreJc,19470
315
310
  validmind/vm_models/__init__.py,sha256=lcqf9q2aRzrVrNN6R--81IkrnSa6BXPbhJ8SnkT_hcI,702
316
311
  validmind/vm_models/dataset/__init__.py,sha256=U4CxZjdoc0dd9u2AqBl5PJh1UVbzXWNrmundmjLF-qE,346
317
312
  validmind/vm_models/dataset/dataset.py,sha256=F6_rc5pjccRLnB7UcIMiGMbD-qMVUW5v4TnZTNSXTbo,26370
@@ -321,14 +316,14 @@ validmind/vm_models/input.py,sha256=qLdqz_bktr4v0YcPha2vFdDvmkC-btT1pH9zBIkt1OY,
321
316
  validmind/vm_models/model.py,sha256=PRNyrnKihIRtbYt4idLPHf8OCij71Vgc5Xug_oVZfBg,6486
322
317
  validmind/vm_models/result/__init__.py,sha256=Bs5GMGDxiTsxlwCdqxz5LmGkY0_fM6-_0-3tWSRoqps,341
323
318
  validmind/vm_models/result/result.jinja,sha256=Yvovwm5gInCBukFRlvJXNlDIUpl2eFz4dz1lS3Sn_Gc,311
324
- validmind/vm_models/result/result.py,sha256=kMyr_7qOgo30ZW87MomIm-ck5d8Ph2Kx4cUE2hh-EEM,13784
319
+ validmind/vm_models/result/result.py,sha256=McmQnFiSUL4zXxB1u2-saoCkTxPMUTQZEtk3ZpmpyEg,15266
325
320
  validmind/vm_models/result/utils.py,sha256=t6g-g1fJ3SU9lHqC1kMeozMkrUnfOMwYAep3Z5XFXNo,5122
326
321
  validmind/vm_models/test_suite/runner.py,sha256=Cpl9WKwHzJD5Zvrh71FzbEhGZkHM0x0MSd4PIwdOLDQ,5427
327
322
  validmind/vm_models/test_suite/summary.py,sha256=Ug3nMvpPL2DSTDujWagWMCrFiW9oDy0AqJL_zXN8pH0,4642
328
323
  validmind/vm_models/test_suite/test.py,sha256=uImjmPlBlLrlVPavsUzbaDK55bvpOn3PuFyWeyYyTac,3908
329
324
  validmind/vm_models/test_suite/test_suite.py,sha256=5Jppt2UXSMgvJ6FO5LIAKA4oN_-hh9SMr8APAFJzk9g,5080
330
- validmind-2.7.8.dist-info/LICENSE,sha256=XonPUfwjvrC5Ombl3y-ko0Wubb1xdG_7nzvIbkZRKHw,35772
331
- validmind-2.7.8.dist-info/METADATA,sha256=rOtVljPIbyIxyShQ4GSOUUfq_Bhqo8eJsxFkj3DDyrM,6084
332
- validmind-2.7.8.dist-info/WHEEL,sha256=IYZQI976HJqqOpQU6PHkJ8fb3tMNBFjg-Cn-pwAbaFM,88
333
- validmind-2.7.8.dist-info/entry_points.txt,sha256=HuW7YyOv9u_OEWpViQXtv0nfoI67uieJHawKWA4Hv9A,76
334
- validmind-2.7.8.dist-info/RECORD,,
325
+ validmind-2.8.10.dist-info/LICENSE,sha256=XonPUfwjvrC5Ombl3y-ko0Wubb1xdG_7nzvIbkZRKHw,35772
326
+ validmind-2.8.10.dist-info/METADATA,sha256=MJDPX24c6xbCA6iNdyA2096lGY2rm6ip1KfBkWLd12A,6058
327
+ validmind-2.8.10.dist-info/WHEEL,sha256=XbeZDeTWKc1w7CSIyre5aMDU_-PohRwTQceYnisIYYY,88
328
+ validmind-2.8.10.dist-info/entry_points.txt,sha256=HuW7YyOv9u_OEWpViQXtv0nfoI67uieJHawKWA4Hv9A,76
329
+ validmind-2.8.10.dist-info/RECORD,,
@@ -1,4 +1,4 @@
1
1
  Wheel-Version: 1.0
2
- Generator: poetry-core 2.0.1
2
+ Generator: poetry-core 2.1.1
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
@@ -1,29 +0,0 @@
1
- id: test_result_description
2
- name: Test Result Description
3
- description: Generate a description for a test result
4
- version: 0.1.0
5
- model: gpt-4o
6
- temperature: 0.0
7
- output_type: markdown
8
- prompts:
9
- system:
10
- role: system
11
- path: system.jinja
12
- user:
13
- role: user
14
- path: user.jinja
15
- inputs:
16
- test_name:
17
- description: The name of the test that produced the result (usually the last part of the test ID)
18
- type: string
19
- test_description:
20
- description: The description (docstring) of the test that was run
21
- type: string
22
- summary:
23
- description: The json result summary (i.e. the table(s) returned by the test)
24
- type: list
25
- optional: true
26
- figures:
27
- description: A list of base64 encoded images of the figures returned by the test
28
- type: list
29
- optional: true
@@ -1,73 +0,0 @@
1
- # Copyright © 2023-2024 ValidMind Inc. All rights reserved.
2
- # See the LICENSE file in the root of this repository for details.
3
- # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
-
5
- import multiprocessing
6
-
7
- MIN_IMAGES_FOR_PARALLEL = 4
8
- MAX_WORKERS = multiprocessing.cpu_count()
9
-
10
-
11
- def parallel_downsample_images(base64_strings):
12
- import os
13
- import sys
14
-
15
- sys.path.insert(0, os.path.dirname(os.path.dirname(__file__)))
16
- from test_result_description.image_processing import (
17
- downsample_image, # type: ignore
18
- )
19
-
20
- num_images = len(base64_strings)
21
-
22
- if num_images < MIN_IMAGES_FOR_PARALLEL:
23
- return [downsample_image(img) for img in base64_strings]
24
-
25
- num_workers = min(num_images, MAX_WORKERS)
26
-
27
- with multiprocessing.Pool(processes=num_workers) as pool:
28
- results = pool.map(downsample_image, base64_strings)
29
-
30
- sys.path.pop(0)
31
-
32
- return results
33
-
34
-
35
- class Context:
36
- def __init__(self, mode="local"):
37
- pass
38
-
39
- def load(self, input_data):
40
- # this task can accept a dict or a test result object from the ValidMind Library
41
- if isinstance(input_data, dict):
42
- return input_data
43
-
44
- # we are likely running outside of the ValidMind Library and need to convert
45
- # the test result object to a dictionary
46
- test_result = input_data
47
-
48
- try:
49
- from markdownify import markdownify as md
50
- except ImportError as e:
51
- raise ImportError(
52
- "Failed to import markdownify. Please install the package to use this task."
53
- ) from e
54
-
55
- input_data = {
56
- "test_name": test_result.result_id.split(".")[-1],
57
- "test_description": md(test_result.result_metadata[0]["text"]),
58
- }
59
-
60
- if hasattr(test_result, "metric") and test_result.metric.summary is not None:
61
- input_data["summary"] = test_result.metric.summary.serialize()
62
- elif (
63
- hasattr(test_result, "test_results")
64
- and test_result.test_results.summary is not None
65
- ):
66
- input_data["summary"] = test_result.test_results.summary.serialize()
67
-
68
- if test_result.figures:
69
- input_data["figures"] = parallel_downsample_images(
70
- [figure._get_b64_url() for figure in test_result.figures]
71
- )
72
-
73
- return input_data
@@ -1,124 +0,0 @@
1
- # Copyright © 2023-2024 ValidMind Inc. All rights reserved.
2
- # See the LICENSE file in the root of this repository for details.
3
- # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
-
5
- import base64
6
- import io
7
-
8
- import numpy as np
9
- from PIL import Image, ImageEnhance, ImageFilter
10
-
11
- DOWNSAMPLE_PERCENTAGE = 50
12
-
13
-
14
- def open_base64_image(base64_string):
15
- if base64_string.startswith("data:image/png;base64,"):
16
- base64_string = base64_string.split(",")[1]
17
-
18
- image_data = base64.b64decode(base64_string)
19
- image_buffer = io.BytesIO(image_data)
20
- image = Image.open(image_buffer)
21
-
22
- return image
23
-
24
-
25
- def downsample_image(base64_string):
26
- image = open_base64_image(base64_string)
27
-
28
- # Calculate the target dimensions based on the reduction percentage
29
- target_width = int(image.width * (1 - DOWNSAMPLE_PERCENTAGE / 100))
30
- target_height = int(image.height * (1 - DOWNSAMPLE_PERCENTAGE / 100))
31
-
32
- # If the image is already smaller than the target size, return the original
33
- if image.width <= target_width and image.height <= target_height:
34
- return base64_string
35
-
36
- # remove any margins from the image
37
- # Find the bounding box of non-uniform pixels (margin detection)
38
- width, height = image.size
39
- background = image.getpixel((0, 0)) # Assume top-left pixel is background color
40
-
41
- def is_different(pixel):
42
- return pixel != background
43
-
44
- left = next(
45
- x
46
- for x in range(width)
47
- if any(is_different(image.getpixel((x, y))) for y in range(height))
48
- )
49
- right = next(
50
- x
51
- for x in range(width - 1, -1, -1)
52
- if any(is_different(image.getpixel((x, y))) for y in range(height))
53
- )
54
- top = next(
55
- y
56
- for y in range(height)
57
- if any(is_different(image.getpixel((x, y))) for x in range(width))
58
- )
59
- bottom = next(
60
- y
61
- for y in range(height - 1, -1, -1)
62
- if any(is_different(image.getpixel((x, y))) for x in range(width))
63
- )
64
-
65
- # Crop the image to remove the uniform margin (with some padding)
66
- bbox = (left - 5, top - 5, right + 6, bottom + 6)
67
- image = image.crop(bbox)
68
-
69
- # If the image has an alpha channel, remove any transparent margins
70
- if image.mode in ("RGBA", "LA"):
71
- alpha = image.getchannel("A")
72
- bbox = alpha.getbbox()
73
- if bbox:
74
- image = image.crop(bbox)
75
-
76
- # Apply unsharp mask to enhance edges
77
- image = image.filter(ImageFilter.UnsharpMask(radius=2, percent=150, threshold=3))
78
-
79
- # Calculate new dimensions
80
- aspect_ratio = image.width / image.height
81
- new_height = target_height
82
- new_width = int(new_height * aspect_ratio)
83
-
84
- # print(f"downsampling from {width}x{height} to {new_width}x{new_height}")
85
-
86
- # Ensure we don't exceed the target width
87
- if new_width > target_width:
88
- new_width = target_width
89
- new_height = int(new_width / aspect_ratio)
90
-
91
- # print(f"downsampling from {image.width}x{image.height} to {new_width}x{new_height}")
92
-
93
- # Convert to numpy array for custom downsampling
94
- img_array = np.array(image)
95
-
96
- # Optimized area interpolation
97
- h_factor = img_array.shape[0] / new_height
98
- w_factor = img_array.shape[1] / new_width
99
-
100
- h_indices = (np.arange(new_height).reshape(-1, 1) * h_factor).astype(int)
101
- w_indices = (np.arange(new_width).reshape(1, -1) * w_factor).astype(int)
102
-
103
- h_indices = np.minimum(h_indices, img_array.shape[0] - 1)
104
- w_indices = np.minimum(w_indices, img_array.shape[1] - 1)
105
-
106
- # Convert back to PIL Image
107
- image = Image.fromarray(img_array[h_indices, w_indices].astype(np.uint8))
108
-
109
- # Enhance contrast slightly
110
- enhancer = ImageEnhance.Contrast(image)
111
- image = enhancer.enhance(1.2)
112
-
113
- # Sharpen the image
114
- image = image.filter(ImageFilter.SHARPEN)
115
-
116
- # Convert the image to bytes in PNG format
117
- buffered = io.BytesIO()
118
- image.save(buffered, format="PNG")
119
- img_bytes = buffered.getvalue()
120
-
121
- # Encode the bytes to base64
122
- b64_encoded = base64.b64encode(img_bytes).decode("utf-8")
123
-
124
- return f"data:image/png;base64,{b64_encoded}"
@@ -1,39 +0,0 @@
1
- You are an expert data scientist and MRM specialist.
2
- You are tasked with analyzing the results of a quantitative test run on some model or dataset.
3
- Your goal is to create a test description that will act as part of the model documentation.
4
- You will provide both the developer and other consumers of the documentation with a clear and concise "interpretation" of the results they will see.
5
- The overarching theme to maintain is MRM documentation.
6
-
7
- Examine the provided statistical test results and compose a description of the results.
8
- The results are either in the form of serialized tables or images of plots.
9
- Compose a description and interpretation of the result to accompany it in MRM documentation.
10
- It will be read by other data scientists and developers and by validators and stakeholders.
11
-
12
- Use valid Markdown syntax to format the response.
13
- Avoid long sentences and complex vocabulary.
14
- Avoid overly verbose explanations - the goal is to explain to a user what they are seeing in the results.
15
- Structure the response clearly and logically.
16
- Respond only with your analysis and insights, not the verbatim test results.
17
- Respond only with the markdown content, no explanation or context for your response is necessary.
18
- Use the Test ID that is provided to form the Test Name e.g. "ClassImbalance" -> "Class Imbalance".
19
-
20
- Explain the test, its purpose, its mechanism/formula etc and why it is useful.
21
- If relevant, provide a very brief description of the way this test is used in model/dataset evaluation and how it is interpreted.
22
- Highlight the key insights from the test results. The key insights should be concise and easily understood.
23
- An insight should only be included if it is something not entirely obvious from the test results.
24
- End the response with any closing remarks, summary or additional useful information.
25
-
26
- Use the following format for the response (feel free to stray from it if necessary - this is a suggested starting point):
27
-
28
- <ResponseFormat>
29
- **<Test Name>** calculates the xyz <continue to explain what it does in detail>...
30
-
31
- This test is useful for <explain why and for what this test is useful>...
32
-
33
- **Key Insights:**
34
-
35
- The following key insights can be identified in the test results:
36
-
37
- - **<key insight 1 - title>**: <concise explanation of key insight 1>
38
- - ...<continue with any other key insights using the same format>
39
- </ResponseFormat>
@@ -1,30 +0,0 @@
1
- **Test ID**: `{{ test_name }}`
2
-
3
- **Test Description**:
4
-
5
- {{ test_description }}
6
-
7
- ---
8
-
9
- Generate a description of the following result of the test using the instructions given in your system prompt.
10
-
11
- {%- if context %}
12
- **Context**:
13
- {{ context }}
14
- {%- endif %}
15
-
16
- {%- if summary %}
17
- **Test Result Tables** *(Raw Data)*:
18
- {{ summary }}
19
- {%- endif %}
20
-
21
- {%- if figures %}
22
- The following images make up the results of the test.
23
- {%- for b64_image_url in figures %}
24
- [[IMAGE:{{ b64_image_url }}]]
25
- {%- endfor %}
26
- {%- endif %}
27
-
28
- Keep your response concise and to the point!
29
- Only include content in your response if its something truly insightful or interesting!
30
- DO NOT VERBOSELY EXPLAIN THE TEST OR THE RESULTS!!!