validmind 2.2.5__py3-none-any.whl → 2.2.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
validmind/__version__.py CHANGED
@@ -1 +1 @@
1
- __version__ = "2.2.5"
1
+ __version__ = "2.2.6"
validmind/ai.py CHANGED
@@ -7,6 +7,11 @@ import os
7
7
 
8
8
  from openai import AzureOpenAI, OpenAI
9
9
 
10
+ from .logging import get_logger
11
+
12
+ logger = get_logger(__name__)
13
+
14
+
10
15
  SYSTEM_PROMPT = """
11
16
  You are an expert data scientist and MRM specialist.
12
17
  You are tasked with analyzing the results of a quantitative test run on some model or dataset.
@@ -19,6 +24,7 @@ This will act as the description and interpretation of the result in the model d
19
24
  It will be displayed alongside the test results table and figures.
20
25
 
21
26
  Avoid long sentences and complex vocabulary.
27
+ Avoid overly verbose explanations - the goal is to explain to a user what they are seeing in the results.
22
28
  Structure the response clearly and logically.
23
29
  Use valid Markdown syntax to format the response.
24
30
  Respond only with your analysis and insights, not the verbatim test results.
@@ -28,9 +34,10 @@ Use the Test ID that is provided to form the Test Name e.g. "ClassImbalance" ->
28
34
  Explain the test, its purpose, its mechanism/formula etc and why it is useful.
29
35
  If relevant, provide a very brief description of the way this test is used in model/dataset evaluation and how it is interpreted.
30
36
  Highlight the key insights from the test results. The key insights should be concise and easily understood.
37
+ An insight should only be included if it is something not entirely obvious from the test results.
31
38
  End the response with any closing remarks, summary or additional useful information.
32
39
 
33
- Use the following format for the response (feel free to modify slightly if necessary):
40
+ Use the following format for the response (feel free to stray from it if necessary - this is a suggested starting point):
34
41
 
35
42
  <ResponseFormat>
36
43
  **<Test Name>** calculates the xyz <continue to explain what it does in detail>...
@@ -73,12 +80,17 @@ The attached plots show the results of the test.
73
80
  __client = None
74
81
  __model = None
75
82
 
83
+ # can be None, True or False (ternary to represent initial state, ack and failed ack)
84
+ __ack = None
85
+
76
86
  __executor = concurrent.futures.ThreadPoolExecutor()
77
87
 
78
88
 
79
89
  def __get_client_and_model():
80
- """
81
- Get the model to use for generating interpretations
90
+ """Get model and client to use for generating interpretations
91
+
92
+ On first call, it will look in the environment for the API key endpoint, model etc.
93
+ and store them in a global variable to avoid loading them up again.
82
94
  """
83
95
  global __client, __model
84
96
 
@@ -86,8 +98,10 @@ def __get_client_and_model():
86
98
  return __client, __model
87
99
 
88
100
  if "OPENAI_API_KEY" in os.environ:
89
- __client = OpenAI(api_key=os.environ.get("OPENAI_API_KEY"))
90
- __model = os.environ.get("VM_OPENAI_MODEL", "gpt-4o")
101
+ __client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
102
+ __model = os.getenv("VM_OPENAI_MODEL", "gpt-4o")
103
+
104
+ logger.debug(f"Using OpenAI {__model} for generating descriptions")
91
105
 
92
106
  elif "AZURE_OPENAI_KEY" in os.environ:
93
107
  if "AZURE_OPENAI_ENDPOINT" not in os.environ:
@@ -101,11 +115,13 @@ def __get_client_and_model():
101
115
  )
102
116
 
103
117
  __client = AzureOpenAI(
104
- azure_endpoint=os.environ.get("AZURE_OPENAI_ENDPOINT"),
105
- api_key=os.environ.get("AZURE_OPENAI_KEY"),
106
- api_version=os.environ.get("AZURE_OPENAI_VERSION", "2023-05-15"),
118
+ azure_endpoint=os.getenv("AZURE_OPENAI_ENDPOINT"),
119
+ api_key=os.getenv("AZURE_OPENAI_KEY"),
120
+ api_version=os.getenv("AZURE_OPENAI_VERSION", "2023-05-15"),
107
121
  )
108
- __model = os.environ.get("AZURE_OPENAI_MODEL")
122
+ __model = os.getenv("AZURE_OPENAI_MODEL")
123
+
124
+ logger.debug(f"Using Azure OpenAI {__model} for generating descriptions")
109
125
 
110
126
  else:
111
127
  raise ValueError("OPENAI_API_KEY or AZURE_OPENAI_KEY must be set")
@@ -126,12 +142,19 @@ class DescriptionFuture:
126
142
  self._future = future
127
143
 
128
144
  def get_description(self):
129
- # This will block until the future is completed
130
- return self._future.result()
145
+ from .utils import md_to_html
146
+
147
+ if isinstance(self._future, str):
148
+ description = self._future
149
+ else:
150
+ # This will block until the future is completed
151
+ description = self._future.result()
152
+
153
+ return md_to_html(description, mathml=True)
131
154
 
132
155
 
133
- def generate_description_async(
134
- test_name: str,
156
+ def generate_description(
157
+ test_id: str,
135
158
  test_description: str,
136
159
  test_summary: str,
137
160
  figures: list = None,
@@ -140,14 +163,25 @@ def generate_description_async(
140
163
  if not test_summary and not figures:
141
164
  raise ValueError("No summary or figures provided - cannot generate description")
142
165
 
143
- client, _ = __get_client_and_model()
166
+ client, model = __get_client_and_model()
144
167
  # get last part of test id
145
- test_name = test_name.split(".")[-1]
168
+ test_name = test_id.split(".")[-1]
169
+ # truncate the test description to save time
170
+ test_description = (
171
+ f"{test_description[:500]}..."
172
+ if len(test_description) > 500
173
+ else test_description
174
+ )
146
175
 
147
176
  if test_summary:
177
+ logger.debug(
178
+ f"Generating description for test {test_name} with stringified summary"
179
+ )
148
180
  return (
149
181
  client.chat.completions.create(
150
- model="gpt-4o",
182
+ model=model,
183
+ temperature=0,
184
+ seed=42,
151
185
  messages=[
152
186
  {"role": "system", "content": SYSTEM_PROMPT},
153
187
  {
@@ -164,9 +198,14 @@ def generate_description_async(
164
198
  .message.content.strip()
165
199
  )
166
200
 
201
+ logger.debug(
202
+ f"Generating description for test {test_name} with {len(figures)} figures"
203
+ )
167
204
  return (
168
205
  client.chat.completions.create(
169
- model="gpt-4o",
206
+ model=model,
207
+ temperature=0,
208
+ seed=42,
170
209
  messages=[
171
210
  {"role": "system", "content": SYSTEM_PROMPT},
172
211
  {
@@ -197,18 +236,45 @@ def generate_description_async(
197
236
  )
198
237
 
199
238
 
200
- def generate_description(
201
- test_name: str,
239
+ def background_generate_description(
240
+ test_id: str,
202
241
  test_description: str,
203
242
  test_summary: str,
204
243
  figures: list = None,
205
244
  ):
206
- future = __executor.submit(
207
- generate_description_async,
208
- test_name,
209
- test_description,
210
- test_summary,
211
- figures,
212
- )
245
+ def wrapped():
246
+ try:
247
+ return generate_description(
248
+ test_id, test_description, test_summary, figures
249
+ )
250
+ except Exception as e:
251
+ logger.error(f"Failed to generate description: {e}")
252
+
253
+ return test_description
254
+
255
+ return DescriptionFuture(__executor.submit(wrapped))
256
+
257
+
258
+ def is_configured():
259
+ global __ack
260
+
261
+ if __ack:
262
+ return True
263
+
264
+ try:
265
+ client, model = __get_client_and_model()
266
+ # send an empty message with max_tokens=1 to "ping" the API
267
+ response = client.chat.completions.create(
268
+ model=model,
269
+ messages=[{"role": "user", "content": ""}],
270
+ max_tokens=1,
271
+ )
272
+ logger.debug(
273
+ f"Received response from OpenAI: {response.choices[0].message.content}"
274
+ )
275
+ __ack = True
276
+ except Exception as e:
277
+ logger.debug(f"Failed to connect to OpenAI: {e}")
278
+ __ack = False
213
279
 
214
- return DescriptionFuture(future)
280
+ return __ack
validmind/api_client.py CHANGED
@@ -22,19 +22,19 @@ from aiohttp import FormData
22
22
  from .client_config import client_config
23
23
  from .errors import MissingAPICredentialsError, MissingProjectIdError, raise_api_error
24
24
  from .logging import get_logger, init_sentry, send_single_error
25
- from .utils import NumpyEncoder, md_to_html, run_async
25
+ from .utils import NumpyEncoder, run_async
26
26
  from .vm_models import Figure, MetricResult, ThresholdTestResults
27
27
 
28
28
  # TODO: can't import types from vm_models because of circular dependency
29
29
 
30
30
  logger = get_logger(__name__)
31
31
 
32
- _api_key = os.environ.get("VM_API_KEY")
33
- _api_secret = os.environ.get("VM_API_SECRET")
34
- _api_host = os.environ.get("VM_API_HOST")
32
+ _api_key = os.getenv("VM_API_KEY")
33
+ _api_secret = os.getenv("VM_API_SECRET")
34
+ _api_host = os.getenv("VM_API_HOST")
35
35
 
36
- _project = os.environ.get("VM_API_PROJECT")
37
- _run_cuid = os.environ.get("VM_RUN_CUID")
36
+ _project = os.getenv("VM_API_PROJECT")
37
+ _run_cuid = os.getenv("VM_RUN_CUID")
38
38
 
39
39
  __api_session: aiohttp.ClientSession = None
40
40
 
@@ -102,21 +102,21 @@ def init(
102
102
  api_secret = None
103
103
  project = None
104
104
 
105
- _project = project or os.environ.get("VM_API_PROJECT")
105
+ _project = project or os.getenv("VM_API_PROJECT")
106
106
 
107
107
  if _project is None:
108
108
  raise MissingProjectIdError()
109
109
 
110
- _api_key = api_key or os.environ.get("VM_API_KEY")
111
- _api_secret = api_secret or os.environ.get("VM_API_SECRET")
110
+ _api_key = api_key or os.getenv("VM_API_KEY")
111
+ _api_secret = api_secret or os.getenv("VM_API_SECRET")
112
112
 
113
113
  if _api_key is None or _api_secret is None:
114
114
  raise MissingAPICredentialsError()
115
115
 
116
- _api_host = api_host or os.environ.get(
116
+ _api_host = api_host or os.getenv(
117
117
  "VM_API_HOST", "http://127.0.0.1:5000/api/v1/tracking"
118
118
  )
119
- _run_cuid = os.environ.get("VM_RUN_CUID", None)
119
+ _run_cuid = os.getenv("VM_RUN_CUID", None)
120
120
 
121
121
  try:
122
122
  __ping()
@@ -349,7 +349,7 @@ async def log_metadata(
349
349
  """
350
350
  metadata_dict = {"content_id": content_id}
351
351
  if text is not None:
352
- metadata_dict["text"] = md_to_html(text, mathml=True)
352
+ metadata_dict["text"] = text
353
353
  if _json is not None:
354
354
  metadata_dict["json"] = _json
355
355
 
validmind/logging.py CHANGED
@@ -13,22 +13,45 @@ from sentry_sdk.utils import event_from_exception, exc_info_from_error
13
13
 
14
14
  from .__version__ import __version__
15
15
 
16
- __log_level = None
17
16
  __dsn = "https://48f446843657444aa1e2c0d716ef864b@o1241367.ingest.sentry.io/4505239625465856"
18
17
 
19
18
 
20
19
  def _get_log_level():
21
- """Get the log level from the environment variable if not already set"""
22
- if __log_level is not None:
23
- return __log_level
20
+ """Get the log level from the environment variable"""
21
+ log_level_str = os.getenv("LOG_LEVEL", "INFO").upper()
24
22
 
25
- log_level_str = os.environ.get("LOG_LEVEL", "INFO").upper()
26
23
  if log_level_str not in ["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"]:
27
24
  raise ValueError(f"Invalid log level: {log_level_str}")
28
25
 
29
26
  return logging.getLevelName(log_level_str)
30
27
 
31
28
 
29
+ def get_logger(name="validmind", log_level=None):
30
+ """Get a logger for the given module name"""
31
+ formatter = logging.Formatter(
32
+ fmt="%(asctime)s - %(levelname)s(%(name)s): %(message)s"
33
+ )
34
+
35
+ handler = logging.StreamHandler()
36
+ handler.setFormatter(formatter)
37
+
38
+ logger = logging.getLogger(name)
39
+ logger.setLevel(log_level or _get_log_level())
40
+
41
+ # Clear existing handlers if any (or refine the existing logic as necessary)
42
+ # TODO: move this to a yaml config and only configure once
43
+ if not any(
44
+ isinstance(h, type(handler)) and h.formatter._fmt == formatter._fmt
45
+ for h in logger.handlers
46
+ ):
47
+ logger.addHandler(handler)
48
+
49
+ # Prevent logger from propagating to root logger
50
+ logger.propagate = False
51
+
52
+ return logger
53
+
54
+
32
55
  def init_sentry(server_config):
33
56
  """Initialize Sentry SDK for sending logs back to ValidMind
34
57
 
@@ -42,7 +65,10 @@ def init_sentry(server_config):
42
65
  - dsn (str): The Sentry DSN
43
66
  ...: Other config options for Sentry
44
67
  """
45
- if server_config.get("send_logs", False) is False:
68
+ if os.getenv("VM_NO_TELEMETRY", False):
69
+ return
70
+
71
+ if not server_config.get("send_logs", False):
46
72
  return
47
73
 
48
74
  config = {
@@ -53,33 +79,13 @@ def init_sentry(server_config):
53
79
  "environment": "production",
54
80
  }
55
81
  config.update({k: v for k, v in server_config.items() if k != "send_logs"})
56
- sentry_sdk.init(**config)
57
-
58
-
59
- def get_logger(name="validmind", log_level=None):
60
- """Get a logger for the given name"""
61
- formatter = logging.Formatter(
62
- fmt="%(asctime)s - %(levelname)s(%(name)s): %(message)s"
63
- )
64
-
65
- handler = logging.StreamHandler()
66
- handler.setFormatter(formatter)
67
-
68
- logger = logging.getLogger(name)
69
- logger.setLevel(log_level or _get_log_level())
70
-
71
- # Clear existing handlers if any (or refine the existing logic as necessary)
72
- # TODO: lets add some better handler management
73
- if not any(
74
- isinstance(h, type(handler)) and h.formatter._fmt == formatter._fmt
75
- for h in logger.handlers
76
- ):
77
- logger.addHandler(handler)
78
-
79
- # Prevent logger from propagating to root logger
80
- logger.propagate = False
81
82
 
82
- return logger
83
+ try:
84
+ sentry_sdk.init(**config)
85
+ except Exception as e:
86
+ logger = get_logger(__name__)
87
+ logger.info("Sentry failed to initialize - ignoring...")
88
+ logger.debug(f"Sentry error: {str(e)}")
83
89
 
84
90
 
85
91
  def log_performance(func, name=None, logger=None, force=False):
@@ -20,8 +20,8 @@ class AIPoweredTest:
20
20
 
21
21
  def __init__(self, *args, **kwargs):
22
22
  if "OPENAI_API_KEY" in os.environ:
23
- self.client = OpenAI(api_key=os.environ.get("OPENAI_API_KEY"))
24
- self.model_name = os.environ.get("VM_OPENAI_MODEL", "gpt-3.5-turbo")
23
+ self.client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
24
+ self.model_name = os.getenv("VM_OPENAI_MODEL", "gpt-3.5-turbo")
25
25
 
26
26
  elif "AZURE_OPENAI_KEY" in os.environ:
27
27
  if "AZURE_OPENAI_ENDPOINT" not in os.environ:
@@ -35,11 +35,11 @@ class AIPoweredTest:
35
35
  )
36
36
 
37
37
  self.client = AzureOpenAI(
38
- azure_endpoint=os.environ.get("AZURE_OPENAI_ENDPOINT"),
39
- api_key=os.environ.get("AZURE_OPENAI_KEY"),
40
- api_version=os.environ.get("AZURE_OPENAI_VERSION", "2023-05-15"),
38
+ azure_endpoint=os.getenv("AZURE_OPENAI_ENDPOINT"),
39
+ api_key=os.getenv("AZURE_OPENAI_KEY"),
40
+ api_version=os.getenv("AZURE_OPENAI_VERSION", "2023-05-15"),
41
41
  )
42
- self.model_name = os.environ.get("AZURE_OPENAI_MODEL")
42
+ self.model_name = os.getenv("AZURE_OPENAI_MODEL")
43
43
 
44
44
  else:
45
45
  raise ValueError(
validmind/utils.py CHANGED
@@ -26,9 +26,12 @@ from matplotlib.axes._axes import _log as matplotlib_axes_logger
26
26
  from numpy import ndarray
27
27
  from tabulate import tabulate
28
28
 
29
- from .ai import generate_description
29
+ from .ai import background_generate_description, is_configured
30
30
  from .html_templates.content_blocks import math_jax_snippet, python_syntax_highlighting
31
31
 
32
+ AI_REVISION_NAME = "Generated by ValidMind AI"
33
+ DEFAULT_REVISION_NAME = "Default Description"
34
+
32
35
  DEFAULT_BIG_NUMBER_DECIMALS = 2
33
36
  DEFAULT_SMALL_NUMBER_DECIMALS = 4
34
37
 
@@ -471,9 +474,11 @@ def get_description_metadata(
471
474
  Generates an LLM interpretation of the test results or uses the default
472
475
  description and returns a metadata object that can be logged with the test results.
473
476
 
474
- To enable LLM-generated descriptions, set the VALIDMIND_LLM_DESCRIPTIONS_ENABLED
475
- environment variable to "true". The default description will be used if LLM
476
- descriptions are disabled.
477
+ By default, the description is generated by an LLM that will interpret the test
478
+ results and provide a human-readable description. If the summary or figures are
479
+ not provided, or the `VALIDMIND_LLM_DESCRIPTIONS_ENABLED` environment variable is
480
+ set to `0` or `false` or no LLM has been configured, the default description will
481
+ be used as the test result description.
477
482
 
478
483
  Note: Either the summary or figures must be provided to generate the description.
479
484
 
@@ -487,17 +492,26 @@ def get_description_metadata(
487
492
  Returns:
488
493
  dict: The metadata object to be logged with the test results
489
494
  """
490
- if os.environ.get("VALIDMIND_LLM_DESCRIPTIONS_ENABLED", "false").lower() == "true":
491
- revision_name = "Generated by ValidMind AI"
492
- description = generate_description(
493
- test_name=test_id,
495
+ env_disabled = os.getenv("VALIDMIND_LLM_DESCRIPTIONS_ENABLED", "1") in [
496
+ "0",
497
+ "false",
498
+ ]
499
+
500
+ if (summary or figures) and not env_disabled and is_configured():
501
+ revision_name = AI_REVISION_NAME
502
+
503
+ # get description future and set it as the description in the metadata
504
+ # this will lazily retrieved so it can run in the background in parallel
505
+ description = background_generate_description(
506
+ test_id=test_id,
494
507
  test_description=default_description,
495
508
  test_summary=summary,
496
509
  figures=figures,
497
510
  )
511
+
498
512
  else:
499
- revision_name = "Default Description"
500
- description = default_description
513
+ revision_name = DEFAULT_REVISION_NAME
514
+ description = md_to_html(default_description, mathml=True)
501
515
 
502
516
  return {
503
517
  "content_id": f"{prefix}:{test_id}::{revision_name}",
@@ -7,7 +7,6 @@ Result Wrappers for test and metric results
7
7
  """
8
8
  import asyncio
9
9
  import json
10
- import os
11
10
  from abc import ABC, abstractmethod
12
11
  from dataclasses import dataclass
13
12
  from typing import Dict, List, Optional, Union
@@ -19,7 +18,7 @@ from ... import api_client
19
18
  from ...ai import DescriptionFuture
20
19
  from ...input_registry import input_registry
21
20
  from ...logging import get_logger
22
- from ...utils import NumpyEncoder, display, md_to_html, run_async, test_id_to_name
21
+ from ...utils import AI_REVISION_NAME, NumpyEncoder, display, run_async, test_id_to_name
23
22
  from ..dataset import VMDataset
24
23
  from ..figure import Figure
25
24
  from .metric_result import MetricResult
@@ -31,31 +30,35 @@ logger = get_logger(__name__)
31
30
 
32
31
 
33
32
  async def update_metadata(content_id: str, text: str, _json: Union[Dict, List] = None):
34
- """
35
- Update the metadata of a content item. By default we don't
36
- override the existing metadata, but we can override it by
37
- setting the VM_OVERRIDE_METADATA environment variable to True
38
- """
39
- should_update = False
40
-
41
- # check if the env variable is set to force overwriting metadata
42
- if os.environ.get("VM_OVERRIDE_METADATA", "false").lower() == "true":
43
- should_update = True
33
+ """Create or Update a Metadata Object"""
34
+ parts = content_id.split("::")
35
+ content_id = parts[0]
36
+ revision_name = parts[1] if len(parts) > 1 else None
44
37
 
45
- # if not set, check if the content_id is a composite metric def
46
- if not should_update and content_id.startswith("composite_metric_def:"):
47
- # we always want composite metric definitions to be updated
48
- should_update = True
38
+ # we always want composite metric definitions to be updated
39
+ should_update = content_id.startswith("composite_metric_def:")
49
40
 
50
- # if not set, lets check if the metadata already exists
51
- if not should_update:
41
+ # if we are updating a metric or test description, we check if the text
42
+ # has changed from the last time it was logged, and only update if it has
43
+ if content_id.split(":", 1)[0] in ["metric_description", "test_description"]:
52
44
  try:
53
- await api_client.get_metadata(content_id)
54
- except Exception: # TODO: this shouldn't be a catch-all
55
- # if the metadata doesn't exist, we should create (update) it
45
+ md = await api_client.get_metadata(content_id)
46
+ # if there is an existing description, only update it if the new one
47
+ # is different and is an AI-generated description
48
+ should_update = (
49
+ md["text"] != text if revision_name == AI_REVISION_NAME else False
50
+ )
51
+ logger.debug(f"Check if description has changed: {should_update}")
52
+ except Exception:
53
+ # if exception, assume its not created yet TODO: don't catch all
56
54
  should_update = True
57
55
 
58
56
  if should_update:
57
+ if revision_name:
58
+ content_id = f"{content_id}::{revision_name}"
59
+
60
+ logger.debug(f"Updating metadata for `{content_id}`")
61
+
59
62
  await api_client.log_metadata(content_id, text, _json)
60
63
 
61
64
 
@@ -102,12 +105,6 @@ class ResultWrapper(ABC):
102
105
 
103
106
  return self.to_widget()
104
107
 
105
- def _markdown_description_to_html(self, description: str):
106
- """
107
- Convert a markdown string to html
108
- """
109
- return md_to_html(description)
110
-
111
108
  def _summary_tables_to_widget(self, summary: ResultSummary):
112
109
  """
113
110
  Create an ipywdiget representation of the summary tables
@@ -277,9 +274,7 @@ class MetricResultWrapper(ResultWrapper):
277
274
  metric_description = metric_description.get_description()
278
275
  self.result_metadata[0]["text"] = metric_description
279
276
 
280
- vbox_children.append(
281
- HTML(value=self._markdown_description_to_html(metric_description))
282
- )
277
+ vbox_children.append(HTML(value=metric_description))
283
278
 
284
279
  if self.metric:
285
280
  if self.output_template:
@@ -464,9 +459,7 @@ class ThresholdTestResultWrapper(ResultWrapper):
464
459
  metric_description = metric_description.get_description()
465
460
  self.result_metadata[0]["text"] = metric_description
466
461
 
467
- description_html.append(
468
- self._markdown_description_to_html(metric_description)
469
- )
462
+ description_html.append(metric_description)
470
463
 
471
464
  description_html.append(
472
465
  f"""
@@ -145,14 +145,17 @@ class TestSuiteRunner:
145
145
 
146
146
  await asyncio.sleep(0.5)
147
147
 
148
- def summarize(self):
148
+ def summarize(self, show_link: bool = True):
149
149
  if not is_notebook():
150
150
  return logger.info("Test suite done...")
151
151
 
152
+ self.pbar_description.value = "Collecting test results..."
153
+
152
154
  summary = TestSuiteSummary(
153
155
  title=self.suite.title,
154
156
  description=self.suite.description,
155
157
  sections=self.suite.sections,
158
+ show_link=show_link,
156
159
  )
157
160
  summary.display()
158
161
 
@@ -181,6 +184,6 @@ class TestSuiteRunner:
181
184
  run_async(self.log_results)
182
185
  run_async_check(self._check_progress)
183
186
 
184
- self.summarize()
187
+ self.summarize(show_link=send)
185
188
 
186
189
  self._stop_progress_bar()
@@ -35,8 +35,14 @@ class TestSuiteSectionSummary:
35
35
  self._build_summary()
36
36
 
37
37
  def _add_description(self):
38
- description = f'<div class="result">{md_to_html(self.description)}</div>'
39
- self._widgets.append(widgets.HTML(value=description))
38
+ if not self.description:
39
+ return
40
+
41
+ self._widgets.append(
42
+ widgets.HTML(
43
+ value=f'<div class="result">{md_to_html(self.description)}</div>'
44
+ )
45
+ )
40
46
 
41
47
  def _add_tests_summary(self):
42
48
  children = []
@@ -45,9 +51,9 @@ class TestSuiteSectionSummary:
45
51
  for test in self.tests:
46
52
  children.append(test.result.to_widget())
47
53
  titles.append(
48
- f"❌ {test.result.name}: {test.title} ({test.test_id})"
54
+ f"❌ {test.result.name}: {test.name} ({test.test_id})"
49
55
  if isinstance(test.result, FailedResultWrapper)
50
- else f"{test.result.name}: {test.title} ({test.test_id})"
56
+ else f"{test.result.name}: {test.name} ({test.test_id})"
51
57
  )
52
58
 
53
59
  self._widgets.append(widgets.Accordion(children=children, titles=titles))
@@ -71,6 +77,7 @@ class TestSuiteSummary:
71
77
  title: str
72
78
  description: str
73
79
  sections: List[TestSuiteSection]
80
+ show_link: bool = True
74
81
 
75
82
  _widgets: List[widgets.Widget] = None
76
83
 
@@ -100,8 +107,11 @@ class TestSuiteSummary:
100
107
  self._widgets.append(widgets.HTML(value=results_link))
101
108
 
102
109
  def _add_description(self):
103
- description = f'<div class="result">{md_to_html(self.description)}</div>'
104
- self._widgets.append(widgets.HTML(value=description))
110
+ self._widgets.append(
111
+ widgets.HTML(
112
+ value=f'<div class="result">{md_to_html(self.description)}</div>'
113
+ )
114
+ )
105
115
 
106
116
  def _add_sections_summary(self):
107
117
  children = []
@@ -145,7 +155,8 @@ class TestSuiteSummary:
145
155
  self._widgets = []
146
156
 
147
157
  self._add_title()
148
- self._add_results_link()
158
+ if self.show_link:
159
+ self._add_results_link()
149
160
  self._add_description()
150
161
  if len(self.sections) == 1:
151
162
  self._add_top_level_section_summary()
@@ -21,6 +21,7 @@ class TestSuiteTest:
21
21
 
22
22
  test_id: str
23
23
  output_template: str = None
24
+ name: str = None
24
25
 
25
26
  _test_class: Test = None
26
27
  _test_instance: Test = None
@@ -39,6 +40,8 @@ class TestSuiteTest:
39
40
  self.test_id = test_id_or_obj["id"]
40
41
  self.output_template = test_id_or_obj.get("output_template")
41
42
 
43
+ self.name = test_id_to_name(self.test_id)
44
+
42
45
  try:
43
46
  self._test_class = load_test_class(self.test_id)
44
47
  except LoadTestError as e:
@@ -52,14 +55,6 @@ class TestSuiteTest:
52
55
  # since _test_class is None
53
56
  logger.error(f"Failed to load test '{self.test_id}': {e}")
54
57
 
55
- @property
56
- def title(self):
57
- return test_id_to_name(self.test_id)
58
-
59
- @property
60
- def name(self):
61
- return self._test_class.name
62
-
63
58
  @property
64
59
  def test_type(self):
65
60
  return self._test_class.test_type
@@ -86,12 +81,12 @@ class TestSuiteTest:
86
81
  )
87
82
  except Exception as e:
88
83
  logger.error(
89
- f"Failed to load test '{self._test_class.name}': "
84
+ f"Failed to load test '{self.test_id}': "
90
85
  f"({e.__class__.__name__}) {e}"
91
86
  )
92
87
  self.result = FailedResultWrapper(
93
88
  error=e,
94
- message=f"Failed to load test '{self.test_id}'",
89
+ message=f"Failed to load test '{self.name}'",
95
90
  result_id=self.test_id,
96
91
  )
97
92
 
@@ -107,7 +102,7 @@ class TestSuiteTest:
107
102
  # run the test and log the performance if LOG_LEVEL is set to DEBUG
108
103
  log_performance(
109
104
  func=self._test_instance.run,
110
- name=self._test_instance.name,
105
+ name=self.test_id,
111
106
  logger=logger,
112
107
  )() # this is a decorator so we need to call it
113
108
 
@@ -116,14 +111,13 @@ class TestSuiteTest:
116
111
  raise e # Re-raise the exception if we are in fail fast mode
117
112
 
118
113
  logger.error(
119
- f"Failed to run test '{self._test_instance.name}': "
120
- f"({e.__class__.__name__}) {e}"
114
+ f"Failed to run test '{self.test_id}': " f"({e.__class__.__name__}) {e}"
121
115
  )
122
116
  self.result = FailedResultWrapper(
123
117
  name=f"Failed {self._test_instance.test_type}",
124
118
  error=e,
125
- message=f"Failed to run '{self._test_instance.name}'",
126
- result_id=self._test_instance.name,
119
+ message=f"Failed to run '{self.name}'",
120
+ result_id=self.test_id,
127
121
  )
128
122
 
129
123
  return
@@ -132,8 +126,8 @@ class TestSuiteTest:
132
126
  self.result = FailedResultWrapper(
133
127
  name=f"Failed {self._test_instance.test_type}",
134
128
  error=None,
135
- message=f"'{self._test_instance.name}' did not return a result",
136
- result_id=self._test_instance.name,
129
+ message=f"'{self.name}' did not return a result",
130
+ result_id=self.test_id,
137
131
  )
138
132
 
139
133
  return
@@ -142,9 +136,8 @@ class TestSuiteTest:
142
136
  self.result = FailedResultWrapper(
143
137
  name=f"Failed {self._test_instance.test_type}",
144
138
  error=None,
145
- message=f"'{self._test_instance.name}' returned an invalid result: "
146
- f"{self._test_instance.result}",
147
- result_id=self._test_instance.name,
139
+ message=f"{self.name} returned an invalid result: {self._test_instance.result}",
140
+ result_id=self.test_id,
148
141
  )
149
142
 
150
143
  return
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: validmind
3
- Version: 2.2.5
3
+ Version: 2.2.6
4
4
  Summary: ValidMind Developer Framework
5
5
  License: Commercial License
6
6
  Author: Andres Rodriguez
@@ -1,7 +1,7 @@
1
1
  validmind/__init__.py,sha256=XqPjCbFMvEYl0cIT42EZKP7DFMYDC7KDW6syo8MGkDg,3682
2
- validmind/__version__.py,sha256=r1Tn-QXWA9VMrkPdk9c6Clll9uei6qKO7PemQL_uDYI,22
3
- validmind/ai.py,sha256=7iJtKD7G27HLabNKUmVSD_tCbJH62BEqbrlv2IB8sHI,6881
4
- validmind/api_client.py,sha256=A8RLYFdRGdffXkd1qTa0o2_yy6e491N1o17KHHXmb8I,16035
2
+ validmind/__version__.py,sha256=qzqxcwWCwWgKw_eJA2nZPycPzwfpaSjAKO3MwNvDqgw,22
3
+ validmind/ai.py,sha256=Uc09ulMZhu0VgbdZtHlRuzRg1QeCHVXJMXmZd6dbyEQ,9071
4
+ validmind/api_client.py,sha256=kIEO515kp_l5LA_QyRgHOumYaOIMSrCnl9Nj4Rm5TK8,15948
5
5
  validmind/client.py,sha256=S_FozHlMJBgF8IQJES27LeFoYcoCcGZ6dkxE8adyIRQ,18607
6
6
  validmind/client_config.py,sha256=58L6s6-9vFWC9vkSs_98CjV1YWmlksdhblJtPQxQsAk,1611
7
7
  validmind/datasets/__init__.py,sha256=oYfcvW7BAyUgpghBOnTeGbQF6tpFAWg38rRirdLr8m8,262
@@ -59,7 +59,7 @@ validmind/errors.py,sha256=qy7Gp6Uom5J6WmLw-CpE5zaTN96SiN7kJjDGBaJdoxY,8023
59
59
  validmind/html_templates/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
60
60
  validmind/html_templates/content_blocks.py,sha256=AHQ5MlhR1JYldel7soo5ztpTJJ5-kYtyKPBmh-vwxuI,3997
61
61
  validmind/input_registry.py,sha256=zexO3x-vncaoWvQ6VfkvgDLn6x72e2BNel_jCbrVHSE,793
62
- validmind/logging.py,sha256=Ui67RYoB1qbuHm_KX1aGj_8DoK_ljjUDGG6a1XJ4yoY,5041
62
+ validmind/logging.py,sha256=J1Y1dYCH1dtkoYCHoXMOQH_B7EO4fJytWRDrDqZZz8U,5204
63
63
  validmind/models/__init__.py,sha256=lraTbNwoKckXNP3Dbyj-euI78UTkZ_w5wpUOb8l5nWs,729
64
64
  validmind/models/foundation.py,sha256=LSUdpnBYlPiOUVrTyofStPdoR6y0_nqJoM9TiYT1MRo,1758
65
65
  validmind/models/function.py,sha256=loZoheqGyTvHze1XROEX1aqXgM08kPMr67X1nutaaeU,1629
@@ -259,7 +259,7 @@ validmind/tests/prompt_validation/NegativeInstruction.py,sha256=1aqNV_vB5oM2_8UX
259
259
  validmind/tests/prompt_validation/Robustness.py,sha256=VIQotugWQ32Q1kr1kacBuqk-q1EPTRi9NZAIYrTDsY0,6826
260
260
  validmind/tests/prompt_validation/Specificity.py,sha256=v823rZAr9a810Q_RlgH7FqPPxXZ00hDJApkFaJJ8mgk,6116
261
261
  validmind/tests/prompt_validation/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
262
- validmind/tests/prompt_validation/ai_powered_test.py,sha256=pogmrOR2fTY34Tx5TXIs5Smjz09mdh5Kp4NifrmPrFY,2975
262
+ validmind/tests/prompt_validation/ai_powered_test.py,sha256=qE3OXU0Db3z7KNqHMWZE8e78BeGg6pB0IozSzDNXIdc,2945
263
263
  validmind/tests/test_providers.py,sha256=1tYn_sWNqifFpOp8eNvcVyJzxBjhHV5Py4FxO8opPZA,4944
264
264
  validmind/unit_metrics/__init__.py,sha256=a7oV8YRC-O6dF7ePz4E8Fqrh4ax6AWT26Y996VPView,7084
265
265
  validmind/unit_metrics/classification/sklearn/Accuracy.py,sha256=2Ra_OpKceY01h1dAFCqRFAwe--K2oVbCUiYjM5AH_nQ,480
@@ -279,7 +279,7 @@ validmind/unit_metrics/regression/sklearn/MeanAbsoluteError.py,sha256=LCNgpDw6FB
279
279
  validmind/unit_metrics/regression/sklearn/MeanSquaredError.py,sha256=7UQnDTTO7yRRyMe3Zac9ZyjEbbD8pW_8WnZwHdVB_8U,463
280
280
  validmind/unit_metrics/regression/sklearn/RSquaredScore.py,sha256=h9U5ndtnJfNNtKPZIo5n3KRp-m4akQcEo0t1iSwjVzY,420
281
281
  validmind/unit_metrics/regression/sklearn/RootMeanSquaredError.py,sha256=_5IQIU9jNfmTE4NLJvaRWXbudRGV2PS7nYF5e4fkSMY,556
282
- validmind/utils.py,sha256=ckmtwpKvlUAbW4atFWOb8NWqhiz0LurXoqMtI2qq26Y,16342
282
+ validmind/utils.py,sha256=ZQ016Cbgc_hrQb2HZ7s9KH80fDncnQZXFwa9oi8JO8g,16931
283
283
  validmind/vm_models/__init__.py,sha256=lmWCD2u4tW6_AH39UnJ24sCcMUcsHbUttz7SaZfrh3s,1168
284
284
  validmind/vm_models/dataset/__init__.py,sha256=U4CxZjdoc0dd9u2AqBl5PJh1UVbzXWNrmundmjLF-qE,346
285
285
  validmind/vm_models/dataset/dataset.py,sha256=VlR5Wp5pCoXY3U0C8AbevaySFGf0KJ3QIK3go5OEbog,21843
@@ -290,17 +290,17 @@ validmind/vm_models/test/metric.py,sha256=R7Y-_fzBcIrkJw7-BeifQHMuHTV3HLDc8T3nS_
290
290
  validmind/vm_models/test/metric_result.py,sha256=Bak4GDrMlNq5NtgP5exwlPsKZgz3tWgtC6jZqtHjvqM,1987
291
291
  validmind/vm_models/test/output_template.py,sha256=njqCAMyLxwadkCWhACVskyL9-psTgmUysaeeirTVAX4,1500
292
292
  validmind/vm_models/test/result_summary.py,sha256=QJcIKJUeBf5wW3lyue6ctsi1jKSyoiAIfmjudGJiJtc,2028
293
- validmind/vm_models/test/result_wrapper.py,sha256=e0hN_oE31g64PU39zYes-PBgqd05TRXRUKF87VnjMUk,17654
293
+ validmind/vm_models/test/result_wrapper.py,sha256=an310hWJpVvWDrVSFvjTDUBDSE4XJ0aDliSVnKsgZaQ,17611
294
294
  validmind/vm_models/test/test.py,sha256=434PqhPcbwfCmNjYVwHGMG-rViIatb9-1nmxkdZF8Xo,3104
295
295
  validmind/vm_models/test/threshold_test.py,sha256=7d46Z5N_U1hTr6LGa2A0_ZuaIFl54xZ_eRzgf-KUGjk,3662
296
296
  validmind/vm_models/test/threshold_test_result.py,sha256=EXP-g_e3NsnpkvNgYew030qVUoY6ZTHyuuFUXaq-BuM,1954
297
297
  validmind/vm_models/test_context.py,sha256=AN7-atBgOcD04MLVitCFJYooxF6_iNmvI2H4nkv32iw,9035
298
- validmind/vm_models/test_suite/runner.py,sha256=U93TauwLNEbAgJIzBZ9k9ip9NnlTt0gACHVgfO7J9BI,6754
299
- validmind/vm_models/test_suite/summary.py,sha256=GpqabqN_RcI5vbv4-A9YCLTpUOTKockp6oL1hi8IwVs,4541
300
- validmind/vm_models/test_suite/test.py,sha256=cIa-6_YkFp7Io4wBkr09aFNmljmUFSagV4JreLd1Q6Y,5285
298
+ validmind/vm_models/test_suite/runner.py,sha256=uDt1eo3sHUXXV-ZN_gJUKR-0Hp5RNtUcDgKHQXtLf7s,6893
299
+ validmind/vm_models/test_suite/summary.py,sha256=co-xJJMUYGb7cOiVmw0i8vpZlfiMqrWjaCOmHKMAbcE,4686
300
+ validmind/vm_models/test_suite/test.py,sha256=_GfbK36l98SjzgVcucmp0OKBJKqMW3neO7SqJ3EWeps,5049
301
301
  validmind/vm_models/test_suite/test_suite.py,sha256=Cns2wL54v0T5Mv5_HJb3kMeaa4rtycdqT8KxK9_rWEU,6279
302
- validmind-2.2.5.dist-info/LICENSE,sha256=XonPUfwjvrC5Ombl3y-ko0Wubb1xdG_7nzvIbkZRKHw,35772
303
- validmind-2.2.5.dist-info/METADATA,sha256=a8SDCtFs4QR76UFHhVR2olxYIvPODKUTiInrAW9ZDCI,3911
304
- validmind-2.2.5.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
305
- validmind-2.2.5.dist-info/entry_points.txt,sha256=HuW7YyOv9u_OEWpViQXtv0nfoI67uieJHawKWA4Hv9A,76
306
- validmind-2.2.5.dist-info/RECORD,,
302
+ validmind-2.2.6.dist-info/LICENSE,sha256=XonPUfwjvrC5Ombl3y-ko0Wubb1xdG_7nzvIbkZRKHw,35772
303
+ validmind-2.2.6.dist-info/METADATA,sha256=x00vy4OCVq0TNkM2jjt2Jzl-1FwH-I96zzIuoHfXRHU,3911
304
+ validmind-2.2.6.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
305
+ validmind-2.2.6.dist-info/entry_points.txt,sha256=HuW7YyOv9u_OEWpViQXtv0nfoI67uieJHawKWA4Hv9A,76
306
+ validmind-2.2.6.dist-info/RECORD,,