PyPI - validmind - Versions diffs - 2.2.6__py3-none-any.whl → 2.3.1__py3-none-any.whl - Mend

validmind 2.2.6py3-none-any.whl → 2.3.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (102) hide show

validmind/tests/prompt_validation/NegativeInstruction.py CHANGED Viewed

@@ -7,6 +7,7 @@ from typing import List
 import pandas as pd
+from validmind.errors import MissingRequiredTestInputError
 from validmind.vm_models import (
     ResultSummary,
     ResultTable,
@@ -15,11 +16,16 @@ from validmind.vm_models import (
     ThresholdTestResult,
 )
-from .ai_powered_test import AIPoweredTest
+from .ai_powered_test import (
+    call_model,
+    get_explanation,
+    get_score,
+    missing_prompt_message,
+)
 @dataclass
-class NegativeInstruction(ThresholdTest, AIPoweredTest):
+class NegativeInstruction(ThresholdTest):
     """
     Evaluates and grades the use of affirmative, proactive language over negative instructions in LLM prompts.
@@ -96,12 +102,6 @@ Prompt:
 """
 '''.strip()
-    def __init__(self, *args, **kwargs):
-        super().__init__(*args, **kwargs)  # Call ThresholdTest.__init__
-        AIPoweredTest.__init__(
-            self, *args, **kwargs
-        )  # Explicitly call AIPoweredTest.__init__
     def summary(self, results: List[ThresholdTestResult], all_passed: bool):
         result = results[0]
         results_table = [
@@ -125,14 +125,17 @@ Prompt:
         )
     def run(self):
-        response = self.call_model(
+        if not hasattr(self.inputs.model, "prompt"):
+            raise MissingRequiredTestInputError(missing_prompt_message)
+        response = call_model(
             system_prompt=self.system_prompt,
             user_prompt=self.user_prompt.format(
                 prompt_to_test=self.inputs.model.prompt.template
             ),
         )
-        score = self.get_score(response)
-        explanation = self.get_explanation(response)
+        score = get_score(response)
+        explanation = get_explanation(response)
         passed = score > self.params["min_threshold"]
         results = [

validmind/tests/prompt_validation/Robustness.py CHANGED Viewed

@@ -7,7 +7,7 @@ from typing import List
 import pandas as pd
-from validmind.errors import SkipTestError
+from validmind.errors import MissingRequiredTestInputError, SkipTestError
 from validmind.vm_models import (
     ResultSummary,
     ResultTable,
@@ -16,11 +16,11 @@ from validmind.vm_models import (
     ThresholdTestResult,
 )
-from .ai_powered_test import AIPoweredTest
+from .ai_powered_test import call_model, missing_prompt_message
 @dataclass
-class Robustness(ThresholdTest, AIPoweredTest):
+class Robustness(ThresholdTest):
     """
     Assesses the robustness of prompts provided to a Large Language Model under varying conditions and contexts.
@@ -94,12 +94,6 @@ Prompt:
 Input:
 '''.strip()
-    def __init__(self, *args, **kwargs):
-        super().__init__(*args, **kwargs)  # Call ThresholdTest.__init__
-        AIPoweredTest.__init__(
-            self, *args, **kwargs
-        )  # Explicitly call AIPoweredTest.__init__
     def summary(self, results: List[ThresholdTestResult], all_passed: bool):
         results_table = [
             {
@@ -122,8 +116,14 @@ Input:
         )
     def run(self):
+        if not hasattr(self.inputs.model, "prompt"):
+            raise MissingRequiredTestInputError(missing_prompt_message)
         # TODO: add support for multi-variable prompts
-        if len(self.inputs.model.prompt.variables) > 1:
+        if (
+            not self.inputs.model.prompt.variables
+            or len(self.inputs.model.prompt.variables) > 1
+        ):
             raise SkipTestError(
                 "Robustness only supports single-variable prompts for now"
             )
@@ -138,7 +138,7 @@ Input:
         results = []
         for _ in range(self.params["num_tests"]):
-            response = self.call_model(
+            response = call_model(
                 system_prompt=self.system_prompt,
                 user_prompt=self.user_prompt.format(
                     variables="\n".join(self.inputs.model.prompt.variables),

validmind/tests/prompt_validation/Specificity.py CHANGED Viewed

@@ -7,6 +7,7 @@ from typing import List
 import pandas as pd
+from validmind.errors import MissingRequiredTestInputError
 from validmind.vm_models import (
     ResultSummary,
     ResultTable,
@@ -15,11 +16,16 @@ from validmind.vm_models import (
     ThresholdTestResult,
 )
-from .ai_powered_test import AIPoweredTest
+from .ai_powered_test import (
+    call_model,
+    get_explanation,
+    get_score,
+    missing_prompt_message,
+)
 @dataclass
-class Specificity(ThresholdTest, AIPoweredTest):
+class Specificity(ThresholdTest):
     """
     Evaluates and scores the specificity of prompts provided to a Large Language Model (LLM), based on clarity,
     detail, and relevance.
@@ -91,12 +97,6 @@ Prompt:
 """
 '''.strip()
-    def __init__(self, *args, **kwargs):
-        super().__init__(*args, **kwargs)  # Call ThresholdTest.__init__
-        AIPoweredTest.__init__(
-            self, *args, **kwargs
-        )  # Explicitly call AIPoweredTest.__init__
     def summary(self, results: List[ThresholdTestResult], all_passed: bool):
         result = results[0]
         results_table = [
@@ -120,14 +120,17 @@ Prompt:
         )
     def run(self):
-        response = self.call_model(
+        if not hasattr(self.inputs.model, "prompt"):
+            raise MissingRequiredTestInputError(missing_prompt_message)
+        response = call_model(
             system_prompt=self.system_prompt,
             user_prompt=self.user_prompt.format(
                 prompt_to_test=self.inputs.model.prompt.template
             ),
         )
-        score = self.get_score(response)
-        explanation = self.get_explanation(response)
+        score = get_score(response)
+        explanation = get_explanation(response)
         passed = score > self.params["min_threshold"]
         results = [

validmind/tests/prompt_validation/ai_powered_test.py CHANGED Viewed

@@ -2,90 +2,68 @@
 # See the LICENSE file in the root of this repository for details.
 # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
-import os
 import re
-from openai import AzureOpenAI, OpenAI
+from validmind.ai.utils import get_client_and_model
+missing_prompt_message = """
+Cannot run prompt validation tests on a model with no prompt.
+You can set a prompt when creating a vm_model object like this:
+my_vm_model = vm.init_model(
+    predict_fn=call_model,
+    prompt=Prompt(
+        template="<your-prompt-here>",
+        variables=[],
+    ),
+    input_id="my_llm_model",
+)
+"""
+def call_model(
+    system_prompt: str, user_prompt: str, temperature: float = 0.0, seed: int = 42
+):
+    """Call LLM with the given prompts and return the response"""
+    client, model = get_client_and_model()
+    return (
+        client.chat.completions.create(
+            model=model,
+            messages=[
+                {"role": "system", "content": system_prompt},
+                {"role": "user", "content": user_prompt},
+            ],
+            temperature=temperature,
+            seed=seed,
+        )
+        .choices[0]
+        .message.content
+    )
-class AIPoweredTest:
-    """
-    Base class for tests powered by an LLM
-    """
+def get_score(response: str):
+    """Get just the score from the response string
+    TODO: use json response mode instead of this
-    api_key = None
-    client = None
-    endpoint = None
-    model_name = None
-    def __init__(self, *args, **kwargs):
-        if "OPENAI_API_KEY" in os.environ:
-            self.client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
-            self.model_name = os.getenv("VM_OPENAI_MODEL", "gpt-3.5-turbo")
-        elif "AZURE_OPENAI_KEY" in os.environ:
-            if "AZURE_OPENAI_ENDPOINT" not in os.environ:
-                raise ValueError(
-                    "AZURE_OPENAI_ENDPOINT must be set to run LLM tests with Azure"
-                )
-            if "AZURE_OPENAI_MODEL" not in os.environ:
-                raise ValueError(
-                    "AZURE_OPENAI_MODEL must be set to run LLM tests with Azure"
-                )
-            self.client = AzureOpenAI(
-                azure_endpoint=os.getenv("AZURE_OPENAI_ENDPOINT"),
-                api_key=os.getenv("AZURE_OPENAI_KEY"),
-                api_version=os.getenv("AZURE_OPENAI_VERSION", "2023-05-15"),
-            )
-            self.model_name = os.getenv("AZURE_OPENAI_MODEL")
-        else:
-            raise ValueError(
-                "OPENAI_API_KEY or AZURE_OPENAI_KEY must be set to run LLM tests"
-            )
-    def call_model(self, user_prompt: str, system_prompt: str = None):
-        """
-        Call an LLM with the passed prompts and return the response. We're using GPT4 for now.
-        """
-        return (
-            self.client.chat.completions.create(
-                model=self.model_name,
-                messages=[
-                    {"role": "system", "content": system_prompt},
-                    {"role": "user", "content": user_prompt},
-                ],
-                temperature=0.0,
-                seed=42,
-            )
-            .choices[0]
-            .message.content
-        )
-    def get_score(self, response: str):
-        """
-        Get just the numeric data in the response string and convert it to an int
+    e.g. "Score: 8\nExplanation: <some-explanation>" -> 8
+    """
+    score = re.search(r"Score: (\d+)", response)
-        e.g. "Score: 8\nExplanation: <some-explanation>" -> 8
-        """
-        score = re.search(r"Score: (\d+)", response)
+    if not score:
+        raise ValueError("Could not find score in response")
-        if not score:
-            raise ValueError("Could not find score in response")
+    return int(score.group(1))
-        return int(score.group(1))
-    def get_explanation(self, response: str):
-        """
-        Get just the explanation from the response string
+def get_explanation(response: str):
+    """Get just the explanation from the response string
+    TODO: use json response mode instead of this
-        e.g. "Score: 8\nExplanation: <some-explanation>" -> "<some-explanation>"
-        """
-        explanation = re.search(r"Explanation: (.+)", response, re.DOTALL)
+    e.g. "Score: 8\nExplanation: <some-explanation>" -> "<some-explanation>"
+    """
+    explanation = re.search(r"Explanation: (.+)", response, re.DOTALL)
-        if not explanation:
-            raise ValueError("Could not find explanation in response")
+    if not explanation:
+        raise ValueError("Could not find explanation in response")
-        return explanation.group(1)
+    return explanation.group(1).strip().strip("`")

validmind/unit_metrics/composite.py CHANGED Viewed

@@ -6,9 +6,10 @@ from dataclasses import dataclass
 from typing import List, Tuple, Union
 from uuid import uuid4
+from ..ai.test_descriptions import get_description_metadata
 from ..logging import get_logger
 from ..tests.decorator import _inspect_signature
-from ..utils import get_description_metadata, run_async, test_id_to_name
+from ..utils import run_async, test_id_to_name
 from ..vm_models.test.metric import Metric
 from ..vm_models.test.metric_result import MetricResult
 from ..vm_models.test.result_summary import ResultSummary, ResultTable

validmind/utils.py CHANGED Viewed

@@ -6,7 +6,6 @@ import asyncio
 import difflib
 import json
 import math
-import os
 import re
 import sys
 from platform import python_version
@@ -26,11 +25,8 @@ from matplotlib.axes._axes import _log as matplotlib_axes_logger
 from numpy import ndarray
 from tabulate import tabulate
-from .ai import background_generate_description, is_configured
 from .html_templates.content_blocks import math_jax_snippet, python_syntax_highlighting
-AI_REVISION_NAME = "Generated by ValidMind AI"
-DEFAULT_REVISION_NAME = "Default Description"
+from .logging import get_logger
 DEFAULT_BIG_NUMBER_DECIMALS = 2
 DEFAULT_SMALL_NUMBER_DECIMALS = 4
@@ -53,6 +49,8 @@ params = {
 pylab.rcParams.update(params)
 #################################
+logger = get_logger(__name__)
 def is_notebook() -> bool:
     """
@@ -310,7 +308,7 @@ def run_async_check(func, *args, **kwargs):
             if task.get_name() == name:
                 return task
-        return run_async(func, name=name, *args, **kwargs)
+        return run_async(func, name=name, *args, **kwargs)  # noqa B026
     except RuntimeError:
         pass
@@ -460,60 +458,3 @@ def md_to_html(md: str, mathml=False) -> str:
     )
     return html
-def get_description_metadata(
-    test_id,
-    default_description,
-    summary=None,
-    figures=None,
-    prefix="metric_description",
-):
-    """Get Metadata Dictionary for a Test or Metric Result
-    Generates an LLM interpretation of the test results or uses the default
-    description and returns a metadata object that can be logged with the test results.
-    By default, the description is generated by an LLM that will interpret the test
-    results and provide a human-readable description. If the summary or figures are
-    not provided, or the `VALIDMIND_LLM_DESCRIPTIONS_ENABLED` environment variable is
-    set to `0` or `false` or no LLM has been configured, the default description will
-    be used as the test result description.
-    Note: Either the summary or figures must be provided to generate the description.
-    Args:
-        test_id (str): The test ID
-        default_description (str): The default description for the test
-        summary (Any): The test summary or results to interpret
-        figures (List[Figure]): The figures to attach to the test suite result
-        prefix (str): The prefix to use for the content ID (Default: "metric_description")
-    Returns:
-        dict: The metadata object to be logged with the test results
-    """
-    env_disabled = os.getenv("VALIDMIND_LLM_DESCRIPTIONS_ENABLED", "1") in [
-        "0",
-        "false",
-    ]
-    if (summary or figures) and not env_disabled and is_configured():
-        revision_name = AI_REVISION_NAME
-        # get description future and set it as the description in the metadata
-        # this will lazily retrieved so it can run in the background in parallel
-        description = background_generate_description(
-            test_id=test_id,
-            test_description=default_description,
-            test_summary=summary,
-            figures=figures,
-        )
-    else:
-        revision_name = DEFAULT_REVISION_NAME
-        description = md_to_html(default_description, mathml=True)
-    return {
-        "content_id": f"{prefix}:{test_id}::{revision_name}",
-        "text": description,
-    }

validmind/vm_models/dataset/dataset.py CHANGED Viewed

@@ -195,7 +195,19 @@ class VMDataset:
         probability_column: str = None,
         probability_values: list = None,
         prediction_probabilities: list = None,  # DEPRECATED: use probability_values
+        **kwargs,
     ):
+        """Assign predictions and probabilities to the dataset.
+        Args:
+            model (VMModel): The model used to generate the predictions.
+            prediction_column (str, optional): The name of the column containing the predictions. Defaults to None.
+            prediction_values (list, optional): The values of the predictions. Defaults to None.
+            probability_column (str, optional): The name of the column containing the probabilities. Defaults to None.
+            probability_values (list, optional): The values of the probabilities. Defaults to None.
+            prediction_probabilities (list, optional): DEPRECATED: The values of the probabilities. Defaults to None.
+            kwargs: Additional keyword arguments that will get passed through to the model's `predict` method.
+        """
         if prediction_probabilities is not None:
             warnings.warn(
                 "The `prediction_probabilities` argument is deprecated. Use `probability_values` instead.",
@@ -226,7 +238,9 @@ class VMDataset:
         if prediction_values is None:
             X = self.df if isinstance(model, (FunctionModel, PipelineModel)) else self.x
-            probability_values, prediction_values = compute_predictions(model, X)
+            probability_values, prediction_values = compute_predictions(
+                model, X, **kwargs
+            )
         prediction_column = prediction_column or f"{model.input_id}_prediction"
         self._add_column(prediction_column, prediction_values)
@@ -356,8 +370,8 @@ class VMDataset:
         return as_df(self.df[self.probability_column(model)])
     def target_classes(self):
-        """Returns the unique number of target classes for the target (Y) variable"""
-        return [str(i) for i in np.unique(self.y)]
+        """Returns the target class labels or unique values of the target column."""
+        return self.target_class_labels or [str(i) for i in np.unique(self.y)]
     def __str__(self):
         return (

validmind/vm_models/dataset/utils.py CHANGED Viewed

@@ -94,7 +94,7 @@ def _is_probabilties(output):
     return np.all((output >= 0) & (output <= 1)) and np.any((output > 0) & (output < 1))
-def compute_predictions(model, X) -> tuple:
+def compute_predictions(model, X, **kwargs) -> tuple:
     probability_values = None
     try:
@@ -108,7 +108,7 @@ def compute_predictions(model, X) -> tuple:
     try:
         logger.info("Running predict()... This may take a while")
-        prediction_values = model.predict(X)
+        prediction_values = model.predict(X, **kwargs)
         logger.info("Done running predict()")
     except MissingOrInvalidModelPredictFnError:
         raise MissingOrInvalidModelPredictFnError(

validmind/vm_models/model.py CHANGED Viewed

@@ -114,7 +114,7 @@ class VMModel(ABC):
         self.__post_init__()
-    def __post_init__(self):
+    def __post_init__(self):  # noqa: B027
         """Allows child classes to add their own post-init logic"""
         pass

validmind/vm_models/test/metric.py CHANGED Viewed

@@ -12,8 +12,8 @@ from typing import ClassVar, List, Optional, Union
 import pandas as pd
+from ...ai.test_descriptions import get_description_metadata
 from ...errors import MissingCacheResultsArgumentsError
-from ...utils import get_description_metadata
 from ..figure import Figure
 from .metric_result import MetricResult
 from .result_wrapper import MetricResultWrapper
@@ -36,13 +36,6 @@ class Metric(Test):
     # Instance Variables
     result: MetricResultWrapper = None  # populated by cache_results() method
-    @property
-    def key(self):
-        """
-        Keep the key for compatibility reasons
-        """
-        return self._key if hasattr(self, "_key") else self.name
     @abstractmethod
     def summary(self, metric_value: Optional[Union[dict, list, pd.DataFrame]] = None):
         """

validmind/vm_models/test/result_wrapper.py CHANGED Viewed

@@ -15,10 +15,10 @@ import pandas as pd
 from ipywidgets import HTML, GridBox, Layout, VBox
 from ... import api_client
-from ...ai import DescriptionFuture
+from ...ai.test_descriptions import AI_REVISION_NAME, DescriptionFuture
 from ...input_registry import input_registry
 from ...logging import get_logger
-from ...utils import AI_REVISION_NAME, NumpyEncoder, display, run_async, test_id_to_name
+from ...utils import NumpyEncoder, display, run_async, test_id_to_name
 from ..dataset import VMDataset
 from ..figure import Figure
 from .metric_result import MetricResult

validmind/vm_models/test/test.py CHANGED Viewed

@@ -52,6 +52,9 @@ class Test(TestUtils):
                 "test_id is missing. It must be passed when initializing the test"
             )
         self._ref_id = str(uuid4())
+        self.key = (
+            self.test_id
+        )  # for backwards compatibility - figures really should get keyed automatically
         # TODO: add validation for required inputs
         if self.default_params is None:

validmind/vm_models/test/threshold_test.py CHANGED Viewed

@@ -11,7 +11,7 @@ avoid confusion with the "tests" in the general data science/modeling sense.
 from dataclasses import dataclass
 from typing import ClassVar, List, Optional
-from ...utils import get_description_metadata
+from ...ai.test_descriptions import get_description_metadata
 from ..figure import Figure
 from .result_summary import ResultSummary, ResultTable
 from .result_wrapper import ThresholdTestResultWrapper

validmind/vm_models/test_suite/runner.py CHANGED Viewed

@@ -83,11 +83,14 @@ class TestSuiteRunner:
                     test_configs = test_configs.get("params", {})
                 else:
                     if (test_configs) and ("params" not in test_configs):
-                        """[DEPRECATED] Deprecated method for setting test parameters directly in the 'config' parameter"""
-                        logger.info(
-                            "Setting test parameters directly in the 'config' parameter of the run_documentation_tests() method is deprecated. "
-                            'Instead, use the new format of the config: config = {"test_id": {"params": {...}, "inputs": {...}}}'
+                        # [DEPRECATED] This is the old way of setting test parameters
+                        msg = (
+                            "Setting test parameters directly in the 'config' parameter"
+                            " of the run_documentation_tests() method is deprecated. "
+                            "Instead, use the new format of the config: "
+                            'config = {"test_id": {"params": {...}, "inputs": {...}}}'
                         )
+                        logger.warning(msg)
                 test.load(inputs=inputs, context=self.context, config=test_configs)

{validmind-2.2.6.dist-info → validmind-2.3.1.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: validmind
-Version: 2.2.6
+Version: 2.3.1
 Summary: ValidMind Developer Framework
 License: Commercial License
 Author: Andres Rodriguez

validmind 2.2.6__py3-none-any.whl → 2.3.1__py3-none-any.whl

validmind 2.2.6py3-none-any.whl → 2.3.1py3-none-any.whl