PyPI - validmind - Versions diffs - 2.2.4__py3-none-any.whl → 2.2.6__py3-none-any.whl - Mend

validmind 2.2.4py3-none-any.whl → 2.2.6py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (16) hide show

validmind/__version__.py +1 -1
validmind/ai.py +98 -34
validmind/api_client.py +12 -12
validmind/logging.py +38 -32
validmind/tests/prompt_validation/ai_powered_test.py +6 -6
validmind/utils.py +33 -12
validmind/vm_models/test/result_wrapper.py +26 -33
validmind/vm_models/test/threshold_test.py +1 -0
validmind/vm_models/test_suite/runner.py +5 -2
validmind/vm_models/test_suite/summary.py +18 -7
validmind/vm_models/test_suite/test.py +13 -20
{validmind-2.2.4.dist-info → validmind-2.2.6.dist-info}/METADATA +1 -1
{validmind-2.2.4.dist-info → validmind-2.2.6.dist-info}/RECORD +16 -16
{validmind-2.2.4.dist-info → validmind-2.2.6.dist-info}/LICENSE +0 -0
{validmind-2.2.4.dist-info → validmind-2.2.6.dist-info}/WHEEL +0 -0
{validmind-2.2.4.dist-info → validmind-2.2.6.dist-info}/entry_points.txt +0 -0

validmind/__version__.py CHANGED Viewed

	@@ -1 +1 @@
1	- __version__ = "2.2.4"
1	+ __version__ = "2.2.6"

validmind/ai.py CHANGED Viewed

@@ -7,6 +7,11 @@ import os
 from openai import AzureOpenAI, OpenAI
+from .logging import get_logger
+logger = get_logger(__name__)
 SYSTEM_PROMPT = """
 You are an expert data scientist and MRM specialist.
 You are tasked with analyzing the results of a quantitative test run on some model or dataset.
@@ -19,6 +24,7 @@ This will act as the description and interpretation of the result in the model d
 It will be displayed alongside the test results table and figures.
 Avoid long sentences and complex vocabulary.
+Avoid overly verbose explanations - the goal is to explain to a user what they are seeing in the results.
 Structure the response clearly and logically.
 Use valid Markdown syntax to format the response.
 Respond only with your analysis and insights, not the verbatim test results.
@@ -28,10 +34,12 @@ Use the Test ID that is provided to form the Test Name e.g. "ClassImbalance" ->
 Explain the test, its purpose, its mechanism/formula etc and why it is useful.
 If relevant, provide a very brief description of the way this test is used in model/dataset evaluation and how it is interpreted.
 Highlight the key insights from the test results. The key insights should be concise and easily understood.
+An insight should only be included if it is something not entirely obvious from the test results.
 End the response with any closing remarks, summary or additional useful information.
-Use the following format for the response (feel free to modify slightly if necessary):
-```
+Use the following format for the response (feel free to stray from it if necessary - this is a suggested starting point):
+<ResponseFormat>
 **<Test Name>** calculates the xyz <continue to explain what it does in detail>...
 This test is useful for <explain why and for what this test is useful>...
@@ -42,8 +50,7 @@ The following key insights can be identified in the test results:
 - **<key insight 1 - title>**: <concise explanation of key insight 1>
 - ...<continue with any other key insights using the same format>
-```
-It is very important that the text is nicely formatted and contains enough information to be useful to the user as documentation.
+</ResponseFormat>
 """.strip()
@@ -73,12 +80,17 @@ The attached plots show the results of the test.
 __client = None
 __model = None
+# can be None, True or False (ternary to represent initial state, ack and failed ack)
+__ack = None
 __executor = concurrent.futures.ThreadPoolExecutor()
 def __get_client_and_model():
-    """
-    Get the model to use for generating interpretations
+    """Get model and client to use for generating interpretations
+    On first call, it will look in the environment for the API key endpoint, model etc.
+    and store them in a global variable to avoid loading them up again.
     """
     global __client, __model
@@ -86,8 +98,10 @@ def __get_client_and_model():
         return __client, __model
     if "OPENAI_API_KEY" in os.environ:
-        __client = OpenAI(api_key=os.environ.get("OPENAI_API_KEY"))
-        __model = os.environ.get("VM_OPENAI_MODEL", "gpt-4o")
+        __client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
+        __model = os.getenv("VM_OPENAI_MODEL", "gpt-4o")
+        logger.debug(f"Using OpenAI {__model} for generating descriptions")
     elif "AZURE_OPENAI_KEY" in os.environ:
         if "AZURE_OPENAI_ENDPOINT" not in os.environ:
@@ -101,11 +115,13 @@ def __get_client_and_model():
             )
         __client = AzureOpenAI(
-            azure_endpoint=os.environ.get("AZURE_OPENAI_ENDPOINT"),
-            api_key=os.environ.get("AZURE_OPENAI_KEY"),
-            api_version=os.environ.get("AZURE_OPENAI_VERSION", "2023-05-15"),
+            azure_endpoint=os.getenv("AZURE_OPENAI_ENDPOINT"),
+            api_key=os.getenv("AZURE_OPENAI_KEY"),
+            api_version=os.getenv("AZURE_OPENAI_VERSION", "2023-05-15"),
         )
-        __model = os.environ.get("AZURE_OPENAI_MODEL")
+        __model = os.getenv("AZURE_OPENAI_MODEL")
+        logger.debug(f"Using Azure OpenAI {__model} for generating descriptions")
     else:
         raise ValueError("OPENAI_API_KEY or AZURE_OPENAI_KEY must be set")
@@ -126,12 +142,19 @@ class DescriptionFuture:
         self._future = future
     def get_description(self):
-        # This will block until the future is completed
-        return self._future.result()
+        from .utils import md_to_html
+        if isinstance(self._future, str):
+            description = self._future
+        else:
+            # This will block until the future is completed
+            description = self._future.result()
+        return md_to_html(description, mathml=True)
-def generate_description_async(
-    test_name: str,
+def generate_description(
+    test_id: str,
     test_description: str,
     test_summary: str,
     figures: list = None,
@@ -140,14 +163,25 @@ def generate_description_async(
     if not test_summary and not figures:
         raise ValueError("No summary or figures provided - cannot generate description")
-    client, _ = __get_client_and_model()
+    client, model = __get_client_and_model()
     # get last part of test id
-    test_name = test_name.split(".")[-1]
+    test_name = test_id.split(".")[-1]
+    # truncate the test description to save time
+    test_description = (
+        f"{test_description[:500]}..."
+        if len(test_description) > 500
+        else test_description
+    )
     if test_summary:
+        logger.debug(
+            f"Generating description for test {test_name} with stringified summary"
+        )
         return (
             client.chat.completions.create(
-                model="gpt-4o",
+                model=model,
+                temperature=0,
+                seed=42,
                 messages=[
                     {"role": "system", "content": SYSTEM_PROMPT},
                     {
@@ -161,13 +195,17 @@ def generate_description_async(
                 ],
             )
             .choices[0]
-            .message.content.strip("```")
-            .strip()
+            .message.content.strip()
         )
+    logger.debug(
+        f"Generating description for test {test_name} with {len(figures)} figures"
+    )
     return (
         client.chat.completions.create(
-            model="gpt-4o",
+            model=model,
+            temperature=0,
+            seed=42,
             messages=[
                 {"role": "system", "content": SYSTEM_PROMPT},
                 {
@@ -194,23 +232,49 @@ def generate_description_async(
             ],
         )
         .choices[0]
-        .message.content.strip("```")
-        .strip()
+        .message.content.strip()
     )
-def generate_description(
-    test_name: str,
+def background_generate_description(
+    test_id: str,
     test_description: str,
     test_summary: str,
     figures: list = None,
 ):
-    future = __executor.submit(
-        generate_description_async,
-        test_name,
-        test_description,
-        test_summary,
-        figures,
-    )
+    def wrapped():
+        try:
+            return generate_description(
+                test_id, test_description, test_summary, figures
+            )
+        except Exception as e:
+            logger.error(f"Failed to generate description: {e}")
+            return test_description
+    return DescriptionFuture(__executor.submit(wrapped))
+def is_configured():
+    global __ack
+    if __ack:
+        return True
+    try:
+        client, model = __get_client_and_model()
+        # send an empty message with max_tokens=1 to "ping" the API
+        response = client.chat.completions.create(
+            model=model,
+            messages=[{"role": "user", "content": ""}],
+            max_tokens=1,
+        )
+        logger.debug(
+            f"Received response from OpenAI: {response.choices[0].message.content}"
+        )
+        __ack = True
+    except Exception as e:
+        logger.debug(f"Failed to connect to OpenAI: {e}")
+        __ack = False
-    return DescriptionFuture(future)
+    return __ack

validmind/api_client.py CHANGED Viewed

@@ -22,19 +22,19 @@ from aiohttp import FormData
 from .client_config import client_config
 from .errors import MissingAPICredentialsError, MissingProjectIdError, raise_api_error
 from .logging import get_logger, init_sentry, send_single_error
-from .utils import NumpyEncoder, md_to_html, run_async
+from .utils import NumpyEncoder, run_async
 from .vm_models import Figure, MetricResult, ThresholdTestResults
 # TODO: can't import types from vm_models because of circular dependency
 logger = get_logger(__name__)
-_api_key = os.environ.get("VM_API_KEY")
-_api_secret = os.environ.get("VM_API_SECRET")
-_api_host = os.environ.get("VM_API_HOST")
+_api_key = os.getenv("VM_API_KEY")
+_api_secret = os.getenv("VM_API_SECRET")
+_api_host = os.getenv("VM_API_HOST")
-_project = os.environ.get("VM_API_PROJECT")
-_run_cuid = os.environ.get("VM_RUN_CUID")
+_project = os.getenv("VM_API_PROJECT")
+_run_cuid = os.getenv("VM_RUN_CUID")
 __api_session: aiohttp.ClientSession = None
@@ -102,21 +102,21 @@ def init(
         api_secret = None
         project = None
-    _project = project or os.environ.get("VM_API_PROJECT")
+    _project = project or os.getenv("VM_API_PROJECT")
     if _project is None:
         raise MissingProjectIdError()
-    _api_key = api_key or os.environ.get("VM_API_KEY")
-    _api_secret = api_secret or os.environ.get("VM_API_SECRET")
+    _api_key = api_key or os.getenv("VM_API_KEY")
+    _api_secret = api_secret or os.getenv("VM_API_SECRET")
     if _api_key is None or _api_secret is None:
         raise MissingAPICredentialsError()
-    _api_host = api_host or os.environ.get(
+    _api_host = api_host or os.getenv(
         "VM_API_HOST", "http://127.0.0.1:5000/api/v1/tracking"
     )
-    _run_cuid = os.environ.get("VM_RUN_CUID", None)
+    _run_cuid = os.getenv("VM_RUN_CUID", None)
     try:
         __ping()
@@ -349,7 +349,7 @@ async def log_metadata(
     """
     metadata_dict = {"content_id": content_id}
     if text is not None:
-        metadata_dict["text"] = md_to_html(text, mathml=True)
+        metadata_dict["text"] = text
     if _json is not None:
         metadata_dict["json"] = _json

validmind/logging.py CHANGED Viewed

@@ -13,22 +13,45 @@ from sentry_sdk.utils import event_from_exception, exc_info_from_error
 from .__version__ import __version__
-__log_level = None
 __dsn = "https://48f446843657444aa1e2c0d716ef864b@o1241367.ingest.sentry.io/4505239625465856"
 def _get_log_level():
-    """Get the log level from the environment variable if not already set"""
-    if __log_level is not None:
-        return __log_level
+    """Get the log level from the environment variable"""
+    log_level_str = os.getenv("LOG_LEVEL", "INFO").upper()
-    log_level_str = os.environ.get("LOG_LEVEL", "INFO").upper()
     if log_level_str not in ["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"]:
         raise ValueError(f"Invalid log level: {log_level_str}")
     return logging.getLevelName(log_level_str)
+def get_logger(name="validmind", log_level=None):
+    """Get a logger for the given module name"""
+    formatter = logging.Formatter(
+        fmt="%(asctime)s - %(levelname)s(%(name)s): %(message)s"
+    )
+    handler = logging.StreamHandler()
+    handler.setFormatter(formatter)
+    logger = logging.getLogger(name)
+    logger.setLevel(log_level or _get_log_level())
+    # Clear existing handlers if any (or refine the existing logic as necessary)
+    # TODO: move this to a yaml config and only configure once
+    if not any(
+        isinstance(h, type(handler)) and h.formatter._fmt == formatter._fmt
+        for h in logger.handlers
+    ):
+        logger.addHandler(handler)
+    # Prevent logger from propagating to root logger
+    logger.propagate = False
+    return logger
 def init_sentry(server_config):
     """Initialize Sentry SDK for sending logs back to ValidMind
@@ -42,7 +65,10 @@ def init_sentry(server_config):
             - dsn (str): The Sentry DSN
             ...: Other config options for Sentry
     """
-    if server_config.get("send_logs", False) is False:
+    if os.getenv("VM_NO_TELEMETRY", False):
+        return
+    if not server_config.get("send_logs", False):
         return
     config = {
@@ -53,33 +79,13 @@ def init_sentry(server_config):
         "environment": "production",
     }
     config.update({k: v for k, v in server_config.items() if k != "send_logs"})
-    sentry_sdk.init(**config)
-def get_logger(name="validmind", log_level=None):
-    """Get a logger for the given name"""
-    formatter = logging.Formatter(
-        fmt="%(asctime)s - %(levelname)s(%(name)s): %(message)s"
-    )
-    handler = logging.StreamHandler()
-    handler.setFormatter(formatter)
-    logger = logging.getLogger(name)
-    logger.setLevel(log_level or _get_log_level())
-    # Clear existing handlers if any (or refine the existing logic as necessary)
-    # TODO: lets add some better handler management
-    if not any(
-        isinstance(h, type(handler)) and h.formatter._fmt == formatter._fmt
-        for h in logger.handlers
-    ):
-        logger.addHandler(handler)
-    # Prevent logger from propagating to root logger
-    logger.propagate = False
-    return logger
+    try:
+        sentry_sdk.init(**config)
+    except Exception as e:
+        logger = get_logger(__name__)
+        logger.info("Sentry failed to initialize - ignoring...")
+        logger.debug(f"Sentry error: {str(e)}")
 def log_performance(func, name=None, logger=None, force=False):

validmind/tests/prompt_validation/ai_powered_test.py CHANGED Viewed

@@ -20,8 +20,8 @@ class AIPoweredTest:
     def __init__(self, *args, **kwargs):
         if "OPENAI_API_KEY" in os.environ:
-            self.client = OpenAI(api_key=os.environ.get("OPENAI_API_KEY"))
-            self.model_name = os.environ.get("VM_OPENAI_MODEL", "gpt-3.5-turbo")
+            self.client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
+            self.model_name = os.getenv("VM_OPENAI_MODEL", "gpt-3.5-turbo")
         elif "AZURE_OPENAI_KEY" in os.environ:
             if "AZURE_OPENAI_ENDPOINT" not in os.environ:
@@ -35,11 +35,11 @@ class AIPoweredTest:
                 )
             self.client = AzureOpenAI(
-                azure_endpoint=os.environ.get("AZURE_OPENAI_ENDPOINT"),
-                api_key=os.environ.get("AZURE_OPENAI_KEY"),
-                api_version=os.environ.get("AZURE_OPENAI_VERSION", "2023-05-15"),
+                azure_endpoint=os.getenv("AZURE_OPENAI_ENDPOINT"),
+                api_key=os.getenv("AZURE_OPENAI_KEY"),
+                api_version=os.getenv("AZURE_OPENAI_VERSION", "2023-05-15"),
             )
-            self.model_name = os.environ.get("AZURE_OPENAI_MODEL")
+            self.model_name = os.getenv("AZURE_OPENAI_MODEL")
         else:
             raise ValueError(

validmind/utils.py CHANGED Viewed

@@ -26,9 +26,12 @@ from matplotlib.axes._axes import _log as matplotlib_axes_logger
 from numpy import ndarray
 from tabulate import tabulate
-from .ai import generate_description
+from .ai import background_generate_description, is_configured
 from .html_templates.content_blocks import math_jax_snippet, python_syntax_highlighting
+AI_REVISION_NAME = "Generated by ValidMind AI"
+DEFAULT_REVISION_NAME = "Default Description"
 DEFAULT_BIG_NUMBER_DECIMALS = 2
 DEFAULT_SMALL_NUMBER_DECIMALS = 4
@@ -459,15 +462,23 @@ def md_to_html(md: str, mathml=False) -> str:
     return html
-def get_description_metadata(test_id, default_description, summary=None, figures=None):
+def get_description_metadata(
+    test_id,
+    default_description,
+    summary=None,
+    figures=None,
+    prefix="metric_description",
+):
     """Get Metadata Dictionary for a Test or Metric Result
     Generates an LLM interpretation of the test results or uses the default
     description and returns a metadata object that can be logged with the test results.
-    To enable LLM-generated descriptions, set the VALIDMIND_LLM_DESCRIPTIONS_ENABLED
-    environment variable to "true". The default description will be used if LLM
-    descriptions are disabled.
+    By default, the description is generated by an LLM that will interpret the test
+    results and provide a human-readable description. If the summary or figures are
+    not provided, or the `VALIDMIND_LLM_DESCRIPTIONS_ENABLED` environment variable is
+    set to `0` or `false` or no LLM has been configured, the default description will
+    be used as the test result description.
     Note: Either the summary or figures must be provided to generate the description.
@@ -476,23 +487,33 @@ def get_description_metadata(test_id, default_description, summary=None, figures
         default_description (str): The default description for the test
         summary (Any): The test summary or results to interpret
         figures (List[Figure]): The figures to attach to the test suite result
+        prefix (str): The prefix to use for the content ID (Default: "metric_description")
     Returns:
         dict: The metadata object to be logged with the test results
     """
-    if os.environ.get("VALIDMIND_LLM_DESCRIPTIONS_ENABLED", "false").lower() == "true":
-        revision_name = "Generated by ValidMind AI"
-        description = generate_description(
-            test_name=test_id,
+    env_disabled = os.getenv("VALIDMIND_LLM_DESCRIPTIONS_ENABLED", "1") in [
+        "0",
+        "false",
+    ]
+    if (summary or figures) and not env_disabled and is_configured():
+        revision_name = AI_REVISION_NAME
+        # get description future and set it as the description in the metadata
+        # this will lazily retrieved so it can run in the background in parallel
+        description = background_generate_description(
+            test_id=test_id,
             test_description=default_description,
             test_summary=summary,
             figures=figures,
         )
     else:
-        revision_name = "Default Description"
-        description = default_description
+        revision_name = DEFAULT_REVISION_NAME
+        description = md_to_html(default_description, mathml=True)
     return {
-        "content_id": f"metric_description:{test_id}::{revision_name}",
+        "content_id": f"{prefix}:{test_id}::{revision_name}",
         "text": description,
     }

validmind/vm_models/test/result_wrapper.py CHANGED Viewed

@@ -7,7 +7,6 @@ Result Wrappers for test and metric results
 """
 import asyncio
 import json
-import os
 from abc import ABC, abstractmethod
 from dataclasses import dataclass
 from typing import Dict, List, Optional, Union
@@ -19,7 +18,7 @@ from ... import api_client
 from ...ai import DescriptionFuture
 from ...input_registry import input_registry
 from ...logging import get_logger
-from ...utils import NumpyEncoder, display, md_to_html, run_async, test_id_to_name
+from ...utils import AI_REVISION_NAME, NumpyEncoder, display, run_async, test_id_to_name
 from ..dataset import VMDataset
 from ..figure import Figure
 from .metric_result import MetricResult
@@ -31,31 +30,35 @@ logger = get_logger(__name__)
 async def update_metadata(content_id: str, text: str, _json: Union[Dict, List] = None):
-    """
-    Update the metadata of a content item. By default we don't
-    override the existing metadata, but we can override it by
-    setting the VM_OVERRIDE_METADATA environment variable to True
-    """
-    should_update = False
-    # check if the env variable is set to force overwriting metadata
-    if os.environ.get("VM_OVERRIDE_METADATA", "false").lower() == "true":
-        should_update = True
+    """Create or Update a Metadata Object"""
+    parts = content_id.split("::")
+    content_id = parts[0]
+    revision_name = parts[1] if len(parts) > 1 else None
-    # if not set, check if the content_id is a composite metric def
-    if not should_update and content_id.startswith("composite_metric_def:"):
-        # we always want composite metric definitions to be updated
-        should_update = True
+    # we always want composite metric definitions to be updated
+    should_update = content_id.startswith("composite_metric_def:")
-    # if not set, lets check if the metadata already exists
-    if not should_update:
+    # if we are updating a metric or test description, we check if the text
+    # has changed from the last time it was logged, and only update if it has
+    if content_id.split(":", 1)[0] in ["metric_description", "test_description"]:
         try:
-            await api_client.get_metadata(content_id)
-        except Exception:  # TODO: this shouldn't be a catch-all
-            # if the metadata doesn't exist, we should create (update) it
+            md = await api_client.get_metadata(content_id)
+            # if there is an existing description, only update it if the new one
+            # is different and is an AI-generated description
+            should_update = (
+                md["text"] != text if revision_name == AI_REVISION_NAME else False
+            )
+            logger.debug(f"Check if description has changed: {should_update}")
+        except Exception:
+            # if exception, assume its not created yet TODO: don't catch all
             should_update = True
     if should_update:
+        if revision_name:
+            content_id = f"{content_id}::{revision_name}"
+        logger.debug(f"Updating metadata for `{content_id}`")
         await api_client.log_metadata(content_id, text, _json)
@@ -102,12 +105,6 @@ class ResultWrapper(ABC):
         return self.to_widget()
-    def _markdown_description_to_html(self, description: str):
-        """
-        Convert a markdown string to html
-        """
-        return md_to_html(description)
     def _summary_tables_to_widget(self, summary: ResultSummary):
         """
         Create an ipywdiget representation of the summary tables
@@ -277,9 +274,7 @@ class MetricResultWrapper(ResultWrapper):
                 metric_description = metric_description.get_description()
                 self.result_metadata[0]["text"] = metric_description
-            vbox_children.append(
-                HTML(value=self._markdown_description_to_html(metric_description))
-            )
+            vbox_children.append(HTML(value=metric_description))
         if self.metric:
             if self.output_template:
@@ -464,9 +459,7 @@ class ThresholdTestResultWrapper(ResultWrapper):
                 metric_description = metric_description.get_description()
                 self.result_metadata[0]["text"] = metric_description
-            description_html.append(
-                self._markdown_description_to_html(metric_description)
-            )
+            description_html.append(metric_description)
         description_html.append(
             f"""

validmind/vm_models/test/threshold_test.py CHANGED Viewed

@@ -86,6 +86,7 @@ class ThresholdTest(Test):
                     default_description=self.description(),
                     summary=result_summary.serialize(),
                     figures=figures,
+                    prefix="test_description",
                 )
             ],
             inputs=self.get_accessed_inputs(),

validmind/vm_models/test_suite/runner.py CHANGED Viewed

@@ -145,14 +145,17 @@ class TestSuiteRunner:
             await asyncio.sleep(0.5)
-    def summarize(self):
+    def summarize(self, show_link: bool = True):
         if not is_notebook():
             return logger.info("Test suite done...")
+        self.pbar_description.value = "Collecting test results..."
         summary = TestSuiteSummary(
             title=self.suite.title,
             description=self.suite.description,
             sections=self.suite.sections,
+            show_link=show_link,
         )
         summary.display()
@@ -181,6 +184,6 @@ class TestSuiteRunner:
             run_async(self.log_results)
             run_async_check(self._check_progress)
-        self.summarize()
+        self.summarize(show_link=send)
         self._stop_progress_bar()

validmind/vm_models/test_suite/summary.py CHANGED Viewed

@@ -35,8 +35,14 @@ class TestSuiteSectionSummary:
         self._build_summary()
     def _add_description(self):
-        description = f'<div class="result">{md_to_html(self.description)}</div>'
-        self._widgets.append(widgets.HTML(value=description))
+        if not self.description:
+            return
+        self._widgets.append(
+            widgets.HTML(
+                value=f'<div class="result">{md_to_html(self.description)}</div>'
+            )
+        )
     def _add_tests_summary(self):
         children = []
@@ -45,9 +51,9 @@ class TestSuiteSectionSummary:
         for test in self.tests:
             children.append(test.result.to_widget())
             titles.append(
-                f"❌ {test.result.name}: {test.title} ({test.test_id})"
+                f"❌ {test.result.name}: {test.name} ({test.test_id})"
                 if isinstance(test.result, FailedResultWrapper)
-                else f"{test.result.name}: {test.title} ({test.test_id})"
+                else f"{test.result.name}: {test.name} ({test.test_id})"
             )
         self._widgets.append(widgets.Accordion(children=children, titles=titles))
@@ -71,6 +77,7 @@ class TestSuiteSummary:
     title: str
     description: str
     sections: List[TestSuiteSection]
+    show_link: bool = True
     _widgets: List[widgets.Widget] = None
@@ -100,8 +107,11 @@ class TestSuiteSummary:
         self._widgets.append(widgets.HTML(value=results_link))
     def _add_description(self):
-        description = f'<div class="result">{md_to_html(self.description)}</div>'
-        self._widgets.append(widgets.HTML(value=description))
+        self._widgets.append(
+            widgets.HTML(
+                value=f'<div class="result">{md_to_html(self.description)}</div>'
+            )
+        )
     def _add_sections_summary(self):
         children = []
@@ -145,7 +155,8 @@ class TestSuiteSummary:
         self._widgets = []
         self._add_title()
-        self._add_results_link()
+        if self.show_link:
+            self._add_results_link()
         self._add_description()
         if len(self.sections) == 1:
             self._add_top_level_section_summary()

validmind/vm_models/test_suite/test.py CHANGED Viewed

@@ -21,6 +21,7 @@ class TestSuiteTest:
     test_id: str
     output_template: str = None
+    name: str = None
     _test_class: Test = None
     _test_instance: Test = None
@@ -39,6 +40,8 @@ class TestSuiteTest:
             self.test_id = test_id_or_obj["id"]
             self.output_template = test_id_or_obj.get("output_template")
+        self.name = test_id_to_name(self.test_id)
         try:
             self._test_class = load_test_class(self.test_id)
         except LoadTestError as e:
@@ -52,14 +55,6 @@ class TestSuiteTest:
             # since _test_class is None
             logger.error(f"Failed to load test '{self.test_id}': {e}")
-    @property
-    def title(self):
-        return test_id_to_name(self.test_id)
-    @property
-    def name(self):
-        return self._test_class.name
     @property
     def test_type(self):
         return self._test_class.test_type
@@ -86,12 +81,12 @@ class TestSuiteTest:
             )
         except Exception as e:
             logger.error(
-                f"Failed to load test '{self._test_class.name}': "
+                f"Failed to load test '{self.test_id}': "
                 f"({e.__class__.__name__}) {e}"
             )
             self.result = FailedResultWrapper(
                 error=e,
-                message=f"Failed to load test '{self.test_id}'",
+                message=f"Failed to load test '{self.name}'",
                 result_id=self.test_id,
             )
@@ -107,7 +102,7 @@ class TestSuiteTest:
             # run the test and log the performance if LOG_LEVEL is set to DEBUG
             log_performance(
                 func=self._test_instance.run,
-                name=self._test_instance.name,
+                name=self.test_id,
                 logger=logger,
             )()  # this is a decorator so we need to call it
@@ -116,14 +111,13 @@ class TestSuiteTest:
                 raise e  # Re-raise the exception if we are in fail fast mode
             logger.error(
-                f"Failed to run test '{self._test_instance.name}': "
-                f"({e.__class__.__name__}) {e}"
+                f"Failed to run test '{self.test_id}': " f"({e.__class__.__name__}) {e}"
             )
             self.result = FailedResultWrapper(
                 name=f"Failed {self._test_instance.test_type}",
                 error=e,
-                message=f"Failed to run '{self._test_instance.name}'",
-                result_id=self._test_instance.name,
+                message=f"Failed to run '{self.name}'",
+                result_id=self.test_id,
             )
             return
@@ -132,8 +126,8 @@ class TestSuiteTest:
             self.result = FailedResultWrapper(
                 name=f"Failed {self._test_instance.test_type}",
                 error=None,
-                message=f"'{self._test_instance.name}' did not return a result",
-                result_id=self._test_instance.name,
+                message=f"'{self.name}' did not return a result",
+                result_id=self.test_id,
             )
             return
@@ -142,9 +136,8 @@ class TestSuiteTest:
             self.result = FailedResultWrapper(
                 name=f"Failed {self._test_instance.test_type}",
                 error=None,
-                message=f"'{self._test_instance.name}' returned an invalid result: "
-                f"{self._test_instance.result}",
-                result_id=self._test_instance.name,
+                message=f"{self.name} returned an invalid result: {self._test_instance.result}",
+                result_id=self.test_id,
             )
             return

{validmind-2.2.4.dist-info → validmind-2.2.6.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: validmind
-Version: 2.2.4
+Version: 2.2.6
 Summary: ValidMind Developer Framework
 License: Commercial License
 Author: Andres Rodriguez

{validmind-2.2.4.dist-info → validmind-2.2.6.dist-info}/RECORD RENAMED Viewed

@@ -1,7 +1,7 @@
 validmind/__init__.py,sha256=XqPjCbFMvEYl0cIT42EZKP7DFMYDC7KDW6syo8MGkDg,3682
-validmind/__version__.py,sha256=7vcEgCSkbhT9Gg0A1zz0ATFhMnUIE2MAurmV3fHzG8g,22
-validmind/ai.py,sha256=1cXAAoUjH97YL71xDpNscq3voXZp0ODAdeEU4R3RQjY,7031
-validmind/api_client.py,sha256=A8RLYFdRGdffXkd1qTa0o2_yy6e491N1o17KHHXmb8I,16035
+validmind/__version__.py,sha256=qzqxcwWCwWgKw_eJA2nZPycPzwfpaSjAKO3MwNvDqgw,22
+validmind/ai.py,sha256=Uc09ulMZhu0VgbdZtHlRuzRg1QeCHVXJMXmZd6dbyEQ,9071
+validmind/api_client.py,sha256=kIEO515kp_l5LA_QyRgHOumYaOIMSrCnl9Nj4Rm5TK8,15948
 validmind/client.py,sha256=S_FozHlMJBgF8IQJES27LeFoYcoCcGZ6dkxE8adyIRQ,18607
 validmind/client_config.py,sha256=58L6s6-9vFWC9vkSs_98CjV1YWmlksdhblJtPQxQsAk,1611
 validmind/datasets/__init__.py,sha256=oYfcvW7BAyUgpghBOnTeGbQF6tpFAWg38rRirdLr8m8,262
@@ -59,7 +59,7 @@ validmind/errors.py,sha256=qy7Gp6Uom5J6WmLw-CpE5zaTN96SiN7kJjDGBaJdoxY,8023
 validmind/html_templates/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 validmind/html_templates/content_blocks.py,sha256=AHQ5MlhR1JYldel7soo5ztpTJJ5-kYtyKPBmh-vwxuI,3997
 validmind/input_registry.py,sha256=zexO3x-vncaoWvQ6VfkvgDLn6x72e2BNel_jCbrVHSE,793
-validmind/logging.py,sha256=Ui67RYoB1qbuHm_KX1aGj_8DoK_ljjUDGG6a1XJ4yoY,5041
+validmind/logging.py,sha256=J1Y1dYCH1dtkoYCHoXMOQH_B7EO4fJytWRDrDqZZz8U,5204
 validmind/models/__init__.py,sha256=lraTbNwoKckXNP3Dbyj-euI78UTkZ_w5wpUOb8l5nWs,729
 validmind/models/foundation.py,sha256=LSUdpnBYlPiOUVrTyofStPdoR6y0_nqJoM9TiYT1MRo,1758
 validmind/models/function.py,sha256=loZoheqGyTvHze1XROEX1aqXgM08kPMr67X1nutaaeU,1629
@@ -259,7 +259,7 @@ validmind/tests/prompt_validation/NegativeInstruction.py,sha256=1aqNV_vB5oM2_8UX
 validmind/tests/prompt_validation/Robustness.py,sha256=VIQotugWQ32Q1kr1kacBuqk-q1EPTRi9NZAIYrTDsY0,6826
 validmind/tests/prompt_validation/Specificity.py,sha256=v823rZAr9a810Q_RlgH7FqPPxXZ00hDJApkFaJJ8mgk,6116
 validmind/tests/prompt_validation/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-validmind/tests/prompt_validation/ai_powered_test.py,sha256=pogmrOR2fTY34Tx5TXIs5Smjz09mdh5Kp4NifrmPrFY,2975
+validmind/tests/prompt_validation/ai_powered_test.py,sha256=qE3OXU0Db3z7KNqHMWZE8e78BeGg6pB0IozSzDNXIdc,2945
 validmind/tests/test_providers.py,sha256=1tYn_sWNqifFpOp8eNvcVyJzxBjhHV5Py4FxO8opPZA,4944
 validmind/unit_metrics/__init__.py,sha256=a7oV8YRC-O6dF7ePz4E8Fqrh4ax6AWT26Y996VPView,7084
 validmind/unit_metrics/classification/sklearn/Accuracy.py,sha256=2Ra_OpKceY01h1dAFCqRFAwe--K2oVbCUiYjM5AH_nQ,480
@@ -279,7 +279,7 @@ validmind/unit_metrics/regression/sklearn/MeanAbsoluteError.py,sha256=LCNgpDw6FB
 validmind/unit_metrics/regression/sklearn/MeanSquaredError.py,sha256=7UQnDTTO7yRRyMe3Zac9ZyjEbbD8pW_8WnZwHdVB_8U,463
 validmind/unit_metrics/regression/sklearn/RSquaredScore.py,sha256=h9U5ndtnJfNNtKPZIo5n3KRp-m4akQcEo0t1iSwjVzY,420
 validmind/unit_metrics/regression/sklearn/RootMeanSquaredError.py,sha256=_5IQIU9jNfmTE4NLJvaRWXbudRGV2PS7nYF5e4fkSMY,556
-validmind/utils.py,sha256=x3NxuTl0truq0mMhJ1d3z7jpvRifTRcwBf7cw4jkOmY,16209
+validmind/utils.py,sha256=ZQ016Cbgc_hrQb2HZ7s9KH80fDncnQZXFwa9oi8JO8g,16931
 validmind/vm_models/__init__.py,sha256=lmWCD2u4tW6_AH39UnJ24sCcMUcsHbUttz7SaZfrh3s,1168
 validmind/vm_models/dataset/__init__.py,sha256=U4CxZjdoc0dd9u2AqBl5PJh1UVbzXWNrmundmjLF-qE,346
 validmind/vm_models/dataset/dataset.py,sha256=VlR5Wp5pCoXY3U0C8AbevaySFGf0KJ3QIK3go5OEbog,21843
@@ -290,17 +290,17 @@ validmind/vm_models/test/metric.py,sha256=R7Y-_fzBcIrkJw7-BeifQHMuHTV3HLDc8T3nS_
 validmind/vm_models/test/metric_result.py,sha256=Bak4GDrMlNq5NtgP5exwlPsKZgz3tWgtC6jZqtHjvqM,1987
 validmind/vm_models/test/output_template.py,sha256=njqCAMyLxwadkCWhACVskyL9-psTgmUysaeeirTVAX4,1500
 validmind/vm_models/test/result_summary.py,sha256=QJcIKJUeBf5wW3lyue6ctsi1jKSyoiAIfmjudGJiJtc,2028
-validmind/vm_models/test/result_wrapper.py,sha256=e0hN_oE31g64PU39zYes-PBgqd05TRXRUKF87VnjMUk,17654
+validmind/vm_models/test/result_wrapper.py,sha256=an310hWJpVvWDrVSFvjTDUBDSE4XJ0aDliSVnKsgZaQ,17611
 validmind/vm_models/test/test.py,sha256=434PqhPcbwfCmNjYVwHGMG-rViIatb9-1nmxkdZF8Xo,3104
-validmind/vm_models/test/threshold_test.py,sha256=eRyPWTXsYD2jByR13QiY9ms_0zD1GA8zlmExlIw6rZQ,3615
+validmind/vm_models/test/threshold_test.py,sha256=7d46Z5N_U1hTr6LGa2A0_ZuaIFl54xZ_eRzgf-KUGjk,3662
 validmind/vm_models/test/threshold_test_result.py,sha256=EXP-g_e3NsnpkvNgYew030qVUoY6ZTHyuuFUXaq-BuM,1954
 validmind/vm_models/test_context.py,sha256=AN7-atBgOcD04MLVitCFJYooxF6_iNmvI2H4nkv32iw,9035
-validmind/vm_models/test_suite/runner.py,sha256=U93TauwLNEbAgJIzBZ9k9ip9NnlTt0gACHVgfO7J9BI,6754
-validmind/vm_models/test_suite/summary.py,sha256=GpqabqN_RcI5vbv4-A9YCLTpUOTKockp6oL1hi8IwVs,4541
-validmind/vm_models/test_suite/test.py,sha256=cIa-6_YkFp7Io4wBkr09aFNmljmUFSagV4JreLd1Q6Y,5285
+validmind/vm_models/test_suite/runner.py,sha256=uDt1eo3sHUXXV-ZN_gJUKR-0Hp5RNtUcDgKHQXtLf7s,6893
+validmind/vm_models/test_suite/summary.py,sha256=co-xJJMUYGb7cOiVmw0i8vpZlfiMqrWjaCOmHKMAbcE,4686
+validmind/vm_models/test_suite/test.py,sha256=_GfbK36l98SjzgVcucmp0OKBJKqMW3neO7SqJ3EWeps,5049
 validmind/vm_models/test_suite/test_suite.py,sha256=Cns2wL54v0T5Mv5_HJb3kMeaa4rtycdqT8KxK9_rWEU,6279
-validmind-2.2.4.dist-info/LICENSE,sha256=XonPUfwjvrC5Ombl3y-ko0Wubb1xdG_7nzvIbkZRKHw,35772
-validmind-2.2.4.dist-info/METADATA,sha256=Vydgse9o2MIx6yxKe4LzrHPa7sJCejaoW_HTjfBvRec,3911
-validmind-2.2.4.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
-validmind-2.2.4.dist-info/entry_points.txt,sha256=HuW7YyOv9u_OEWpViQXtv0nfoI67uieJHawKWA4Hv9A,76
-validmind-2.2.4.dist-info/RECORD,,
+validmind-2.2.6.dist-info/LICENSE,sha256=XonPUfwjvrC5Ombl3y-ko0Wubb1xdG_7nzvIbkZRKHw,35772
+validmind-2.2.6.dist-info/METADATA,sha256=x00vy4OCVq0TNkM2jjt2Jzl-1FwH-I96zzIuoHfXRHU,3911
+validmind-2.2.6.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
+validmind-2.2.6.dist-info/entry_points.txt,sha256=HuW7YyOv9u_OEWpViQXtv0nfoI67uieJHawKWA4Hv9A,76
+validmind-2.2.6.dist-info/RECORD,,

{validmind-2.2.4.dist-info → validmind-2.2.6.dist-info}/LICENSE RENAMED Viewed

File without changes

{validmind-2.2.4.dist-info → validmind-2.2.6.dist-info}/WHEEL RENAMED Viewed

File without changes

{validmind-2.2.4.dist-info → validmind-2.2.6.dist-info}/entry_points.txt RENAMED Viewed

File without changes

validmind 2.2.4__py3-none-any.whl → 2.2.6__py3-none-any.whl

validmind 2.2.4py3-none-any.whl → 2.2.6py3-none-any.whl