PyPI - validmind - Versions diffs - 2.2.2__tar.gz → 2.2.4__tar.gz - Mend

validmind 2.2.2tar.gz → 2.2.4tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (306) hide show

{validmind-2.2.2 → validmind-2.2.4}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: validmind
-Version: 2.2.2
+Version: 2.2.4
 Summary: ValidMind Developer Framework
 License: Commercial License
 Author: Andres Rodriguez

{validmind-2.2.2 → validmind-2.2.4}/pyproject.toml RENAMED Viewed

@@ -10,7 +10,7 @@ description = "ValidMind Developer Framework"
 license = "Commercial License"
 name = "validmind"
 readme = "README.pypi.md"
-version = "2.2.2"
+version = "2.2.4"
 [tool.poetry.dependencies]
 python = ">=3.8.1,<3.12"

validmind-2.2.4/validmind/__version__.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ __version__ = "2.2.4"

{validmind-2.2.2 → validmind-2.2.4}/validmind/ai.py RENAMED Viewed

@@ -8,45 +8,65 @@ import os
 from openai import AzureOpenAI, OpenAI
 SYSTEM_PROMPT = """
-You are an expert data scientist and MRM specialist tasked with providing concise and'
-objective insights based on the results of quantitative model or dataset analysis.
+You are an expert data scientist and MRM specialist.
+You are tasked with analyzing the results of a quantitative test run on some model or dataset.
+Your goal is to create a test description that will act as part of the model documentation.
+You will provide both the developer and other consumers of the documentation with a clear and concise "interpretation" of the results they will see.
+The overarching theme to maintain is MRM documentation.
-Examine the provided statistical test results and compose a brief summary. Highlight crucial
-insights, focusing on the distribution characteristics, central tendencies (such as mean or median),
-and the variability (including standard deviation and range) of the metrics. Evaluate how
-these statistics might influence the development and performance of a predictive model. Identify
-and explain any discernible trends or anomalies in the test results.
-Your analysis will act as the description of the result in the model documentation.
+Examine the provided statistical test results and compose a description of the results.
+This will act as the description and interpretation of the result in the model documentation.
+It will be displayed alongside the test results table and figures.
 Avoid long sentences and complex vocabulary.
 Structure the response clearly and logically.
-Use valid Markdown syntax to format the response (tables are supported).
+Use valid Markdown syntax to format the response.
+Respond only with your analysis and insights, not the verbatim test results.
+Respond only with the markdown content, no explanation or context for your response is necessary.
 Use the Test ID that is provided to form the Test Name e.g. "ClassImbalance" -> "Class Imbalance".
+Explain the test, its purpose, its mechanism/formula etc and why it is useful.
+If relevant, provide a very brief description of the way this test is used in model/dataset evaluation and how it is interpreted.
+Highlight the key insights from the test results. The key insights should be concise and easily understood.
+End the response with any closing remarks, summary or additional useful information.
 Use the following format for the response (feel free to modify slightly if necessary):
 ```
-**<Test Name>** <continue to explain what it does in detail>...
+**<Test Name>** calculates the xyz <continue to explain what it does in detail>...
+This test is useful for <explain why and for what this test is useful>...
-The results of this test <detailed explanation of the results>...
+**Key Insights:**
-In summary the following key insights can be gained:
+The following key insights can be identified in the test results:
-- **<key insight 1 - title>**: <explanation of key insight 1>
+- **<key insight 1 - title>**: <concise explanation of key insight 1>
 - ...<continue with any other key insights using the same format>
 ```
 It is very important that the text is nicely formatted and contains enough information to be useful to the user as documentation.
 """.strip()
 USER_PROMPT = """
-Test ID: {test_name}
-Test Description: {test_description}
-Test Results (the raw results of the test):
-{test_results}
-Test Summary (what the user sees in the documentation):
+Test ID: `{test_name}`
+<Test Docstring>
+{test_description}
+</Test Docstring>
+<Test Results Summary>
 {test_summary}
+</Test Results Summary>
 """.strip()
 USER_PROMPT_FIGURES = """
-Test ID: {test_name}
-Test Description: {test_description}
+Test ID: `{test_name}`
+<Test Docstring>
+{test_description}
+</Test Docstring>
 The attached plots show the results of the test.
 """.strip()
@@ -113,21 +133,40 @@ class DescriptionFuture:
 def generate_description_async(
     test_name: str,
     test_description: str,
-    test_results: str,
     test_summary: str,
     figures: list = None,
 ):
     """Generate the description for the test results"""
-    client, _ = __get_client_and_model()
+    if not test_summary and not figures:
+        raise ValueError("No summary or figures provided - cannot generate description")
+    client, _ = __get_client_and_model()
     # get last part of test id
     test_name = test_name.split(".")[-1]
-    if not test_results and not test_summary:
-        if not figures:
-            raise ValueError("No results, summary or figures provided")
+    if test_summary:
+        return (
+            client.chat.completions.create(
+                model="gpt-4o",
+                messages=[
+                    {"role": "system", "content": SYSTEM_PROMPT},
+                    {
+                        "role": "user",
+                        "content": USER_PROMPT.format(
+                            test_name=test_name,
+                            test_description=test_description,
+                            test_summary=test_summary,
+                        ),
+                    },
+                ],
+            )
+            .choices[0]
+            .message.content.strip("```")
+            .strip()
+        )
-        response = client.chat.completions.create(
+    return (
+        client.chat.completions.create(
             model="gpt-4o",
             messages=[
                 {"role": "system", "content": SYSTEM_PROMPT},
@@ -154,30 +193,15 @@ def generate_description_async(
                 },
             ],
         )
-    else:
-        response = client.chat.completions.create(
-            model="gpt-4o",
-            messages=[
-                {"role": "system", "content": SYSTEM_PROMPT},
-                {
-                    "role": "user",
-                    "content": USER_PROMPT.format(
-                        test_name=test_name,
-                        test_description=test_description,
-                        test_results=test_results,
-                        test_summary=test_summary,
-                    ),
-                },
-            ],
-        )
-    return response.choices[0].message.content.strip("```").strip()
+        .choices[0]
+        .message.content.strip("```")
+        .strip()
+    )
 def generate_description(
     test_name: str,
     test_description: str,
-    test_results: str,
     test_summary: str,
     figures: list = None,
 ):
@@ -185,7 +209,6 @@ def generate_description(
         generate_description_async,
         test_name,
         test_description,
-        test_results,
         test_summary,
         figures,
     )

{validmind-2.2.2 → validmind-2.2.4}/validmind/api_client.py RENAMED Viewed

@@ -161,14 +161,20 @@ def __ping() -> Dict[str, Any]:
     init_sentry(client_info.get("sentry_config", {}))
+    # Only show this confirmation the first time we connect to the API
+    ack_connected = False
+    if client_config.project is None:
+        ack_connected = True
     client_config.project = client_info["project"]
     client_config.documentation_template = client_info.get("documentation_template", {})
     client_config.feature_flags = client_info.get("feature_flags", {})
-    logger.info(
-        f"Connected to ValidMind. Project: {client_config.project['name']}"
-        f" ({client_config.project['cuid']})"
-    )
+    if ack_connected:
+        logger.info(
+            f"Connected to ValidMind. Project: {client_config.project['name']}"
+            f" ({client_config.project['cuid']})"
+        )
 def reload():
@@ -358,7 +364,11 @@ async def log_metadata(
 async def log_metrics(
-    metrics: List[MetricResult], inputs: List[str], output_template: str = None
+    metrics: List[MetricResult],
+    inputs: List[str],
+    output_template: str = None,
+    section_id: str = None,
+    position: int = None,
 ) -> Dict[str, Any]:
     """Logs metrics to ValidMind API.
@@ -366,6 +376,8 @@ async def log_metrics(
         metrics (list): A list of MetricResult objects
         inputs (list): A list of input keys (names) that were used to run the test
         output_template (str): The optional output template for the test
+        section_id (str): The section ID add a test driven block to the documentation
+        position (int): The position in the section to add the test driven block
     Raises:
         Exception: If the API call fails
@@ -373,7 +385,14 @@ async def log_metrics(
     Returns:
         dict: The response from the API
     """
+    params = {}
+    if section_id:
+        params["section_id"] = section_id
+    if position is not None:
+        params["position"] = position
     data = []
     for metric in metrics:
         metric_data = {
             **metric.serialize(),
@@ -388,6 +407,7 @@ async def log_metrics(
     try:
         return await _post(
             "log_metrics",
+            params=params,
             data=json.dumps(data, cls=NumpyEncoder, allow_nan=False),
         )
     except Exception as e:
@@ -396,7 +416,10 @@ async def log_metrics(
 async def log_test_result(
-    result: ThresholdTestResults, inputs: List[str], dataset_type: str = "training"
+    result: ThresholdTestResults,
+    inputs: List[str],
+    section_id: str = None,
+    position: int = None,
 ) -> Dict[str, Any]:
     """Logs test results information
@@ -406,8 +429,8 @@ async def log_test_result(
     Args:
         result (validmind.ThresholdTestResults): A ThresholdTestResults object
         inputs (list): A list of input keys (names) that were used to run the test
-        dataset_type (str, optional): The type of dataset. Can be one of
-            "training", "test", or "validation". Defaults to "training".
+        section_id (str, optional): The section ID add a test driven block to the documentation
+        position (int): The position in the section to add the test driven block
     Raises:
         Exception: If the API call fails
@@ -415,10 +438,16 @@ async def log_test_result(
     Returns:
         dict: The response from the API
     """
+    params = {}
+    if section_id:
+        params["section_id"] = section_id
+    if position is not None:
+        params["position"] = position
     try:
         return await _post(
             "log_test_results",
-            params={"dataset_type": dataset_type},
+            params=params,
             data=json.dumps(
                 {
                     **result.serialize(),
@@ -434,7 +463,7 @@ async def log_test_result(
 def log_test_results(
-    results: List[ThresholdTestResults], inputs, dataset_type: str = "training"
+    results: List[ThresholdTestResults], inputs
 ) -> List[Callable[..., Dict[str, Any]]]:
     """Logs test results information
@@ -444,8 +473,6 @@ def log_test_results(
     Args:
         results (list): A list of ThresholdTestResults objects
         inputs (list): A list of input keys (names) that were used to run the test
-        dataset_type (str, optional): The type of dataset. Can be one of "training",
-          "test", or "validation". Defaults to "training".
     Raises:
         Exception: If the API call fails
@@ -456,7 +483,7 @@ def log_test_results(
     try:
         responses = []  # TODO: use asyncio.gather
         for result in results:
-            responses.append(run_async(log_test_result, result, inputs, dataset_type))
+            responses.append(run_async(log_test_result, result, inputs))
     except Exception as e:
         logger.error("Error logging test results to ValidMind API")
         raise e

{validmind-2.2.2 → validmind-2.2.4}/validmind/errors.py RENAMED Viewed

@@ -339,7 +339,7 @@ def raise_api_error(error_string):
     try:
         json_response = json.loads(error_string)
         api_code = json_response.get("code")
-        api_description = json_response.get("description")
+        api_description = json_response.get("description", json_response.get("message"))
     except json.decoder.JSONDecodeError:
         api_code = "unknown"
         api_description = error_string

{validmind-2.2.2 → validmind-2.2.4}/validmind/tests/__init__.py RENAMED Viewed

@@ -388,7 +388,7 @@ def describe_test(test_id: str = None, raw: bool = False, show: bool = True):
         ),
         table_display="table" if details["Params"] else "none",
         example_inputs=json.dumps(
-            {name: f"my_vm_{name}" for name in details["Required Inputs"]},
+            {name: f"my_vm_{name}" for name in (details["Required Inputs"] or [])},
             indent=4,
         ),
         example_params=json.dumps(details["Params"] or {}, indent=4, cls=NumpyEncoder),

{validmind-2.2.2 → validmind-2.2.4}/validmind/tests/decorator.py RENAMED Viewed

@@ -15,6 +15,7 @@ import pandas as pd
 from validmind.errors import MissingRequiredTestInputError
 from validmind.logging import get_logger
+from validmind.utils import get_description_metadata
 from validmind.vm_models import (
     Metric,
     MetricResult,
@@ -113,20 +114,24 @@ def _build_result(results, test_id, description, output_template, inputs):  # no
     else:
         process_item(results)
+    result_summary = ResultSummary(results=tables)
     return MetricResultWrapper(
         result_id=test_id,
         metric=MetricResult(
             key=test_id,
             ref_id=ref_id,
             value="Empty",
-            summary=ResultSummary(results=tables),
+            summary=result_summary,
         ),
         figures=figures,
         result_metadata=[
-            {
-                "content_id": f"metric_description:{test_id}",
-                "text": description,
-            }
+            get_description_metadata(
+                test_id=test_id,
+                default_description=description,
+                summary=result_summary.serialize(),
+                figures=figures,
+            )
         ],
         inputs=inputs,
         output_template=output_template,

{validmind-2.2.2 → validmind-2.2.4}/validmind/unit_metrics/composite.py RENAMED Viewed

@@ -8,7 +8,7 @@ from uuid import uuid4
 from ..logging import get_logger
 from ..tests.decorator import _inspect_signature
-from ..utils import run_async, test_id_to_name
+from ..utils import get_description_metadata, run_async, test_id_to_name
 from ..vm_models.test.metric import Metric
 from ..vm_models.test.metric_result import MetricResult
 from ..vm_models.test.result_summary import ResultSummary, ResultTable
@@ -200,13 +200,15 @@ def run_metrics(
         </style>
         """
+    result_summary = ResultSummary(results=[ResultTable(data=[results])])
     result_wrapper = MetricResultWrapper(
         result_id=test_id,
         result_metadata=[
-            {
-                "content_id": f"metric_description:{test_id}",
-                "text": description,
-            },
+            get_description_metadata(
+                test_id=test_id,
+                default_description=description,
+                summary=result_summary.serialize(),
+            ),
             {
                 "content_id": f"composite_metric_def:{test_id}:unit_metrics",
                 "json": metric_ids,
@@ -222,7 +224,7 @@ def run_metrics(
             key=test_id,
             ref_id=str(uuid4()),
             value=results,
-            summary=ResultSummary(results=[ResultTable(data=[results])]),
+            summary=result_summary,
         ),
     )

{validmind-2.2.2 → validmind-2.2.4}/validmind/utils.py RENAMED Viewed

@@ -6,6 +6,7 @@ import asyncio
 import difflib
 import json
 import math
+import os
 import re
 import sys
 from platform import python_version
@@ -25,6 +26,7 @@ from matplotlib.axes._axes import _log as matplotlib_axes_logger
 from numpy import ndarray
 from tabulate import tabulate
+from .ai import generate_description
 from .html_templates.content_blocks import math_jax_snippet, python_syntax_highlighting
 DEFAULT_BIG_NUMBER_DECIMALS = 2
@@ -432,7 +434,9 @@ def display(widget_or_html, syntax_highlighting=True, mathjax=True):
 def md_to_html(md: str, mathml=False) -> str:
     """Converts Markdown to HTML using mistune with plugins"""
     # use mistune with math plugin to convert to html
-    html = mistune.create_markdown(plugins=["math"])(md)
+    html = mistune.create_markdown(
+        plugins=["math", "table", "strikethrough", "footnotes"]
+    )(md)
     if not mathml:
         # return the html as is (with latex that will be rendered by MathJax)
@@ -453,3 +457,42 @@ def md_to_html(md: str, mathml=False) -> str:
     )
     return html
+def get_description_metadata(test_id, default_description, summary=None, figures=None):
+    """Get Metadata Dictionary for a Test or Metric Result
+    Generates an LLM interpretation of the test results or uses the default
+    description and returns a metadata object that can be logged with the test results.
+    To enable LLM-generated descriptions, set the VALIDMIND_LLM_DESCRIPTIONS_ENABLED
+    environment variable to "true". The default description will be used if LLM
+    descriptions are disabled.
+    Note: Either the summary or figures must be provided to generate the description.
+    Args:
+        test_id (str): The test ID
+        default_description (str): The default description for the test
+        summary (Any): The test summary or results to interpret
+        figures (List[Figure]): The figures to attach to the test suite result
+    Returns:
+        dict: The metadata object to be logged with the test results
+    """
+    if os.environ.get("VALIDMIND_LLM_DESCRIPTIONS_ENABLED", "false").lower() == "true":
+        revision_name = "Generated by ValidMind AI"
+        description = generate_description(
+            test_name=test_id,
+            test_description=default_description,
+            test_summary=summary,
+            figures=figures,
+        )
+    else:
+        revision_name = "Default Description"
+        description = default_description
+    return {
+        "content_id": f"metric_description:{test_id}::{revision_name}",
+        "text": description,
+    }

{validmind-2.2.2 → validmind-2.2.4}/validmind/vm_models/dataset/dataset.py RENAMED Viewed

@@ -114,7 +114,7 @@ class VMDataset:
         if model:
             self.assign_predictions(model)
-    def _set_feature_columns(self, feature_columns):
+    def _set_feature_columns(self, feature_columns=None):
         if feature_columns is not None and (
             not isinstance(feature_columns, list)
             or not all(isinstance(col, str) for col in feature_columns)
@@ -269,7 +269,9 @@ class VMDataset:
             column_name (str): The name of the extra column.
             column_values (np.ndarray, optional): The values of the extra column.
         """
-        if column_name not in self.columns and not column_values:
+        if column_name not in self.columns and (
+            column_values is None or len(column_values) == 0
+        ):
             raise ValueError(
                 "Column values must be provided when the column doesn't exist in the dataset"
             )

{validmind-2.2.2 → validmind-2.2.4}/validmind/vm_models/test/metric.py RENAMED Viewed

@@ -6,15 +6,14 @@
 Class for storing ValidMind metric objects and associated
 data for display and reporting purposes
 """
-import os
 from abc import abstractmethod
 from dataclasses import dataclass
 from typing import ClassVar, List, Optional, Union
 import pandas as pd
-from ...ai import generate_description
 from ...errors import MissingCacheResultsArgumentsError
+from ...utils import get_description_metadata
 from ..figure import Figure
 from .metric_result import MetricResult
 from .result_wrapper import MetricResultWrapper
@@ -83,30 +82,16 @@ class Metric(Test):
             summary=self.summary(metric_value),
         )
-        if (
-            os.environ.get("VALIDMIND_LLM_DESCRIPTIONS_ENABLED", "false").lower()
-            == "true"
-        ):
-            revision_name = "Generated by ValidMind AI"
-            description = generate_description(
-                test_name=self.test_id,
-                test_description=self.description().splitlines()[0],
-                test_results=metric.serialize()["value"],
-                test_summary=metric.serialize()["summary"],
-                figures=figures,
-            )
-        else:
-            revision_name = "Default Description"
-            description = self.description()
-        description_metadata = {
-            "content_id": f"metric_description:{self.test_id}::{revision_name}",
-            "text": description,
-        }
         self.result = MetricResultWrapper(
             result_id=self.test_id,
-            result_metadata=[description_metadata],
+            result_metadata=[
+                get_description_metadata(
+                    test_id=self.test_id,
+                    default_description=self.description(),
+                    summary=metric.serialize()["summary"],
+                    figures=figures,
+                )
+            ],
             metric=metric,
             figures=figures,
             inputs=self.get_accessed_inputs(),

validmind 2.2.2__tar.gz → 2.2.4__tar.gz

validmind 2.2.2tar.gz → 2.2.4tar.gz