PyPI - validmind - Versions diffs - 2.8.22__py3-none-any.whl → 2.8.26__py3-none-any.whl - Mend

validmind 2.8.22py3-none-any.whl → 2.8.26py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (35) hide show

validmind/__init__.py CHANGED Viewed

@@ -53,6 +53,7 @@ from .client import (  # noqa: E402
     run_documentation_tests,
     run_test_suite,
 )
+from .experimental import agents as experimental_agent
 from .tests.decorator import tags, tasks, test
 from .tests.run import print_env
 from .utils import is_notebook, parse_version
@@ -126,4 +127,6 @@ __all__ = [  # noqa
     "unit_metrics",
     "test_suites",
     "log_text",
+    # experimental features
+    "experimental_agent",
 ]

validmind/__version__.py CHANGED Viewed

	@@ -1 +1 @@
1	- __version__ = "2.8.22"
1	+ __version__ = "2.8.25"

validmind/ai/utils.py CHANGED Viewed

@@ -15,6 +15,10 @@ logger = get_logger(__name__)
 __client = None
 __model = None
+__judge_llm = None
+__judge_embeddings = None
+EMBEDDINGS_MODEL = "text-embedding-3-small"
 # can be None, True or False (ternary to represent initial state, ack and failed ack)
 __ack = None
@@ -105,6 +109,91 @@ def get_client_and_model():
     return __client, __model
+def get_judge_config(judge_llm=None, judge_embeddings=None):
+    try:
+        from langchain_core.embeddings import Embeddings
+        from langchain_core.language_models.chat_models import BaseChatModel
+        from langchain_openai import ChatOpenAI, OpenAIEmbeddings
+        from validmind.models.function import FunctionModel
+    except ImportError:
+        raise ImportError("Please run `pip install validmind[llm]` to use LLM tests")
+    if judge_llm is not None or judge_embeddings is not None:
+        if isinstance(judge_llm, FunctionModel) and judge_llm is not None:
+            if isinstance(judge_llm.model, BaseChatModel):
+                judge_llm = judge_llm.model
+            else:
+                raise ValueError(
+                    "The ValidMind Functional model provided does not have have a langchain compatible LLM as a model attribute."
+                    "To use default ValidMind LLM, do not set judge_llm/judge_embedding parameter, "
+                    "ensure that you are connected to the ValidMind API and confirm ValidMind AI is enabled for your account."
+                )
+        if isinstance(judge_embeddings, FunctionModel) and judge_embeddings is not None:
+            if isinstance(judge_llm.model, BaseChatModel):
+                judge_embeddings = judge_embeddings.model
+            else:
+                raise ValueError(
+                    "The ValidMind Functional model provided does not have have a langchain compatible embeddings model as a model attribute."
+                    "To use default ValidMind LLM, do not set judge_embedding parameter, "
+                    "ensure that you are connected to the ValidMind API and confirm ValidMind AI is enabled for your account."
+                )
+        if (isinstance(judge_llm, BaseChatModel) or judge_llm is None) and (
+            isinstance(judge_embeddings, Embeddings) or judge_embeddings is None
+        ):
+            return judge_llm, judge_embeddings
+        else:
+            raise ValueError(
+                "Provided Judge LLM/Embeddings are not Langchain compatible. Ensure the judge LLM/embedding provided are an instance of "
+                "Langchain BaseChatModel and LangchainEmbeddings.  To use default ValidMind LLM, do not set judge_llm/judge_embedding parameter, "
+                "ensure that you are connected to the ValidMind API and confirm ValidMind AI is enabled for your account."
+            )
+    # grab default values if not passed at run time
+    global __judge_llm, __judge_embeddings
+    if __judge_llm and __judge_embeddings:
+        return __judge_llm, __judge_embeddings
+    client, model = get_client_and_model()
+    os.environ["OPENAI_API_BASE"] = str(client.base_url)
+    __judge_llm = ChatOpenAI(api_key=client.api_key, model=model)
+    __judge_embeddings = OpenAIEmbeddings(
+        api_key=client.api_key, model=EMBEDDINGS_MODEL
+    )
+    return __judge_llm, __judge_embeddings
+def set_judge_config(judge_llm, judge_embeddings):
+    global __judge_llm, __judge_embeddings
+    try:
+        from langchain_core.embeddings import Embeddings
+        from langchain_core.language_models.chat_models import BaseChatModel
+        from validmind.models.function import FunctionModel
+    except ImportError:
+        raise ImportError("Please run `pip install validmind[llm]` to use LLM tests")
+    if isinstance(judge_llm, BaseChatModel) and isinstance(
+        judge_embeddings, Embeddings
+    ):
+        __judge_llm = judge_llm
+        __judge_embeddings = judge_embeddings
+        # Assuming 'your_object' is the object you want to check
+    elif isinstance(judge_llm, FunctionModel) and isinstance(
+        judge_embeddings, FunctionModel
+    ):
+        __judge_llm = judge_llm.model
+        __judge_embeddings = judge_embeddings.model
+    else:
+        raise ValueError(
+            "Provided Judge LLM/Embeddings are not Langchain compatible. Ensure the judge LLM/embedding provided are an instance of "
+            "Langchain BaseChatModel and LangchainEmbeddings. To use default ValidMind LLM, do not set judge_llm/judge_embedding parameter, "
+            "ensure that you are connected to the ValidMind API and confirm ValidMind AI is enabled for your account."
+        )
 def is_configured():
     global __ack

validmind/api_client.py CHANGED Viewed

@@ -448,6 +448,7 @@ async def alog_metric(
     params: Optional[Dict[str, Any]] = None,
     recorded_at: Optional[str] = None,
     thresholds: Optional[Dict[str, Any]] = None,
+    passed: Optional[bool] = None,
 ):
     """See log_metric for details."""
     if not key or not isinstance(key, str):
@@ -476,6 +477,7 @@ async def alog_metric(
                     "params": params or {},
                     "recorded_at": recorded_at,
                     "thresholds": thresholds or {},
+                    "passed": passed if passed is not None else None,
                 },
                 cls=NumpyEncoder,
                 allow_nan=False,
@@ -493,6 +495,7 @@ def log_metric(
     params: Optional[Dict[str, Any]] = None,
     recorded_at: Optional[str] = None,
     thresholds: Optional[Dict[str, Any]] = None,
+    passed: Optional[bool] = None,
 ):
     """Logs a unit metric.
@@ -518,6 +521,7 @@ def log_metric(
         params=params,
         recorded_at=recorded_at,
         thresholds=thresholds,
+        passed=passed,
     )

validmind/experimental/__init__.py ADDED Viewed

File without changes

validmind/experimental/agents.py ADDED Viewed

@@ -0,0 +1,65 @@
+# Copyright © 2023-2024 ValidMind Inc. All rights reserved.
+# See the LICENSE file in the root of this repository for details.
+# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
+"""
+Agent interface for all text generation tasks
+"""
+import requests
+from validmind.api_client import _get_api_headers, _get_url, raise_api_error
+from validmind.utils import is_html, md_to_html
+from validmind.vm_models.result import TextGenerationResult
+def run_task(
+    task: str,
+    input: dict,
+    show: bool = True,
+) -> TextGenerationResult:
+    """
+    Run text generation tasks using AI models.
+    Args:
+        task (str): Type of text generation task to run. Currently supports:
+            - 'code_explainer': Generates natural language explanations of code
+        input (dict): Input parameters for the generation task:
+            - For code_explainer: Must contain 'source_code' and optional parameters
+        show (bool): Whether to display the generated result. Defaults to True.
+    Returns:
+        TextGenerationResult: Result object containing the generated text and metadata
+    Raises:
+        ValueError: If an unsupported task is provided
+        requests.exceptions.RequestException: If the API request fails
+    """
+    if task == "code_explainer" or task == "qualitative_text_generation":
+        r = requests.post(
+            url=_get_url(f"ai/generate/{task}"),
+            headers=_get_api_headers(),
+            json=input,
+        )
+        if r.status_code != 200:
+            raise_api_error(r.text)
+        generated_text = r.json()["content"]
+    else:
+        raise ValueError(f"Unsupported task: {task}")
+    if not is_html(generated_text):
+        generated_text = md_to_html(generated_text, mathml=True)
+    # Create a test result with the generated text
+    result = TextGenerationResult(
+        result_type=f"{task}",
+        description=generated_text,
+        title=f"Text Generation: {task}",
+        doc=f"Generated {task}",
+    )
+    if show:
+        result.show()
+    return result

validmind/tests/data_validation/MutualInformation.py CHANGED Viewed

@@ -68,8 +68,20 @@ def MutualInformation(
     if task not in ["classification", "regression"]:
         raise ValueError("task must be either 'classification' or 'regression'")
-    X = dataset.x
-    y = dataset.y
+    # Check if numeric features exist
+    if not dataset.feature_columns_numeric:
+        raise ValueError(
+            "No numeric features found in dataset. Mutual Information test requires numeric features."
+        )
+    # Check if target column exists
+    if not dataset.target_column:
+        raise ValueError(
+            "Target column is required for Mutual Information calculation but was not provided."
+        )
+    X = dataset._df[dataset.feature_columns_numeric]
+    y = dataset._df[dataset.target_column]
     # Select appropriate MI function based on task type
     if task == "classification":

validmind/tests/model_validation/ragas/AnswerCorrectness.py CHANGED Viewed

@@ -34,6 +34,8 @@ def AnswerCorrectness(
     user_input_column="user_input",
     response_column="response",
     reference_column="reference",
+    judge_llm=None,
+    judge_embeddings=None,
 ):
     """
     Evaluates the correctness of answers in a dataset with respect to the provided ground
@@ -118,7 +120,9 @@ def AnswerCorrectness(
     df = get_renamed_columns(dataset._df, required_columns)
     result_df = evaluate(
-        Dataset.from_pandas(df), metrics=[answer_correctness()], **get_ragas_config()
+        Dataset.from_pandas(df),
+        metrics=[answer_correctness()],
+        **get_ragas_config(judge_llm, judge_embeddings)
     ).to_pandas()
     score_column = "answer_correctness"

validmind/tests/model_validation/ragas/AspectCritic.py CHANGED Viewed

@@ -51,6 +51,8 @@ def AspectCritic(
         "maliciousness",
     ],
     additional_aspects: list = None,
+    judge_llm=None,
+    judge_embeddings=None,
 ):
     """
     Evaluates generations against the following aspects: harmfulness, maliciousness,
@@ -158,7 +160,9 @@ def AspectCritic(
     all_aspects = [built_in_aspects[aspect] for aspect in aspects] + custom_aspects
     result_df = evaluate(
-        Dataset.from_pandas(df), metrics=all_aspects, **get_ragas_config()
+        Dataset.from_pandas(df),
+        metrics=all_aspects,
+        **get_ragas_config(judge_llm, judge_embeddings)
     ).to_pandas()
     # reverse the score for aspects where lower is better

validmind/tests/model_validation/ragas/ContextEntityRecall.py CHANGED Viewed

@@ -33,6 +33,8 @@ def ContextEntityRecall(
     dataset,
     retrieved_contexts_column: str = "retrieved_contexts",
     reference_column: str = "reference",
+    judge_llm=None,
+    judge_embeddings=None,
 ):
     """
     Evaluates the context entity recall for dataset entries and visualizes the results.
@@ -113,7 +115,9 @@ def ContextEntityRecall(
     df = get_renamed_columns(dataset._df, required_columns)
     result_df = evaluate(
-        Dataset.from_pandas(df), metrics=[context_entity_recall()], **get_ragas_config()
+        Dataset.from_pandas(df),
+        metrics=[context_entity_recall()],
+        **get_ragas_config(judge_llm, judge_embeddings)
     ).to_pandas()
     score_column = "context_entity_recall"

validmind/tests/model_validation/ragas/ContextPrecision.py CHANGED Viewed

@@ -34,6 +34,8 @@ def ContextPrecision(
     user_input_column: str = "user_input",
     retrieved_contexts_column: str = "retrieved_contexts",
     reference_column: str = "reference",
+    judge_llm=None,
+    judge_embeddings=None,
 ):  # noqa: B950
     """
     Context Precision is a metric that evaluates whether all of the ground-truth
@@ -109,7 +111,9 @@ def ContextPrecision(
     df = get_renamed_columns(dataset._df, required_columns)
     result_df = evaluate(
-        Dataset.from_pandas(df), metrics=[context_precision()], **get_ragas_config()
+        Dataset.from_pandas(df),
+        metrics=[context_precision()],
+        **get_ragas_config(judge_llm, judge_embeddings)
     ).to_pandas()
     score_column = "llm_context_precision_with_reference"

validmind/tests/model_validation/ragas/ContextPrecisionWithoutReference.py CHANGED Viewed

@@ -34,6 +34,8 @@ def ContextPrecisionWithoutReference(
     user_input_column: str = "user_input",
     retrieved_contexts_column: str = "retrieved_contexts",
     response_column: str = "response",
+    judge_llm=None,
+    judge_embeddings=None,
 ):  # noqa: B950
     """
     Context Precision Without Reference is a metric used to evaluate the relevance of
@@ -104,7 +106,9 @@ def ContextPrecisionWithoutReference(
     df = get_renamed_columns(dataset._df, required_columns)
     result_df = evaluate(
-        Dataset.from_pandas(df), metrics=[context_precision()], **get_ragas_config()
+        Dataset.from_pandas(df),
+        metrics=[context_precision()],
+        **get_ragas_config(judge_llm, judge_embeddings)
     ).to_pandas()
     score_column = "llm_context_precision_without_reference"

validmind/tests/model_validation/ragas/ContextRecall.py CHANGED Viewed

@@ -34,6 +34,8 @@ def ContextRecall(
     user_input_column: str = "user_input",
     retrieved_contexts_column: str = "retrieved_contexts",
     reference_column: str = "reference",
+    judge_llm=None,
+    judge_embeddings=None,
 ):
     """
     Context recall measures the extent to which the retrieved context aligns with the
@@ -109,7 +111,9 @@ def ContextRecall(
     df = get_renamed_columns(dataset._df, required_columns)
     result_df = evaluate(
-        Dataset.from_pandas(df), metrics=[context_recall()], **get_ragas_config()
+        Dataset.from_pandas(df),
+        metrics=[context_recall()],
+        **get_ragas_config(judge_llm, judge_embeddings)
     ).to_pandas()
     score_column = "context_recall"

validmind/tests/model_validation/ragas/Faithfulness.py CHANGED Viewed

@@ -34,6 +34,8 @@ def Faithfulness(
     user_input_column="user_input",
     response_column="response",
     retrieved_contexts_column="retrieved_contexts",
+    judge_llm=None,
+    judge_embeddings=None,
 ):  # noqa
     """
     Evaluates the faithfulness of the generated answers with respect to retrieved contexts.
@@ -114,7 +116,9 @@ def Faithfulness(
     df = get_renamed_columns(dataset._df, required_columns)
     result_df = evaluate(
-        Dataset.from_pandas(df), metrics=[faithfulness()], **get_ragas_config()
+        Dataset.from_pandas(df),
+        metrics=[faithfulness()],
+        **get_ragas_config(judge_llm, judge_embeddings)
     ).to_pandas()
     score_column = "faithfulness"

validmind/tests/model_validation/ragas/NoiseSensitivity.py CHANGED Viewed

@@ -38,6 +38,8 @@ def NoiseSensitivity(
     reference_column="reference",
     focus="relevant",
     user_input_column="user_input",
+    judge_llm=None,
+    judge_embeddings=None,
 ):
     """
     Assesses the sensitivity of a Large Language Model (LLM) to noise in retrieved context by measuring how often it
@@ -149,7 +151,7 @@ def NoiseSensitivity(
     result_df = evaluate(
         Dataset.from_pandas(df),
         metrics=[noise_sensitivity(focus=focus)],
-        **get_ragas_config(),
+        **get_ragas_config(judge_llm, judge_embeddings),
     ).to_pandas()
     score_column = f"noise_sensitivity_{focus}"

validmind/tests/model_validation/ragas/ResponseRelevancy.py CHANGED Viewed

@@ -34,6 +34,8 @@ def ResponseRelevancy(
     user_input_column="user_input",
     retrieved_contexts_column=None,
     response_column="response",
+    judge_llm=None,
+    judge_embeddings=None,
 ):
     """
     Assesses how pertinent the generated answer is to the given prompt.
@@ -44,8 +46,8 @@ def ResponseRelevancy(
     relevancy. This metric is computed using the `user_input`, the `retrieved_contexts`
     and the `response`.
-    The Response Relevancy is defined as the mean cosine similartiy of the original
-    `user_input` to a number of artifical questions, which are generated (reverse-engineered)
+    The Response Relevancy is defined as the mean cosine similarity of the original
+    `user_input` to a number of artificial questions, which are generated (reverse-engineered)
     based on the `response`:
     $$
@@ -62,7 +64,7 @@ def ResponseRelevancy(
     **Note**: *This is a reference-free metric, meaning that it does not require a
     `ground_truth` answer to compare against. A similar metric that does evaluate the
-    correctness of a generated answser with respect to a `ground_truth` answer is
+    correctness of a generated answers with respect to a `ground_truth` answer is
     `validmind.model_validation.ragas.AnswerCorrectness`.*
     ### Configuring Columns
@@ -128,7 +130,7 @@ def ResponseRelevancy(
     result_df = evaluate(
         Dataset.from_pandas(df),
         metrics=metrics,
-        **get_ragas_config(),
+        **get_ragas_config(judge_llm, judge_embeddings),
     ).to_pandas()
     score_column = "answer_relevancy"

validmind/tests/model_validation/ragas/SemanticSimilarity.py CHANGED Viewed

@@ -33,6 +33,8 @@ def SemanticSimilarity(
     dataset,
     response_column="response",
     reference_column="reference",
+    judge_llm=None,
+    judge_embeddings=None,
 ):
     """
     Calculates the semantic similarity between generated responses and ground truths
@@ -107,7 +109,9 @@ def SemanticSimilarity(
     df = get_renamed_columns(dataset._df, required_columns)
     result_df = evaluate(
-        Dataset.from_pandas(df), metrics=[semantic_similarity()], **get_ragas_config()
+        Dataset.from_pandas(df),
+        metrics=[semantic_similarity()],
+        **get_ragas_config(judge_llm, judge_embeddings)
     ).to_pandas()
     score_column = "semantic_similarity"

validmind/tests/model_validation/ragas/utils.py CHANGED Viewed

@@ -2,34 +2,14 @@
 # See the LICENSE file in the root of this repository for details.
 # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
-import os
-from validmind.ai.utils import get_client_and_model, is_configured
+from validmind.ai.utils import get_judge_config
 EMBEDDINGS_MODEL = "text-embedding-3-small"
-def get_ragas_config():
-    # import here since its an optional dependency
-    try:
-        from langchain_openai import ChatOpenAI, OpenAIEmbeddings
-    except ImportError:
-        raise ImportError("Please run `pip install validmind[llm]` to use LLM tests")
-    if not is_configured():
-        raise ValueError(
-            "LLM is not configured. Please set an `OPENAI_API_KEY` environment variable "
-            "or ensure that you are connected to the ValidMind API and ValidMind AI is "
-            "enabled for your account."
-        )
-    client, model = get_client_and_model()
-    os.environ["OPENAI_API_BASE"] = str(client.base_url)
-    return {
-        "llm": ChatOpenAI(api_key=client.api_key, model=model),
-        "embeddings": OpenAIEmbeddings(api_key=client.api_key, model=EMBEDDINGS_MODEL),
-    }
+def get_ragas_config(judge_llm=None, judge_embeddings=None):
+    judge_llm, judge_embeddings = get_judge_config(judge_llm, judge_embeddings)
+    return {"llm": judge_llm, "embeddings": judge_embeddings}
 def make_sub_col_udf(root_col, sub_col):

validmind/tests/model_validation/sklearn/OverfitDiagnosis.py CHANGED Viewed

@@ -220,6 +220,16 @@ def OverfitDiagnosis(
     - May not capture more subtle forms of overfitting that do not exceed the threshold.
     - Assumes that the binning of features adequately represents the data segments.
     """
+    numeric_and_categorical_feature_columns = (
+        datasets[0].feature_columns_numeric + datasets[0].feature_columns_categorical
+    )
+    if not numeric_and_categorical_feature_columns:
+        raise ValueError(
+            "No valid numeric or categorical columns found in features_columns"
+        )
     is_classification = bool(datasets[0].probability_column(model))
     if not metric:
@@ -246,7 +256,7 @@ def OverfitDiagnosis(
     figures = []
     results_headers = ["slice", "shape", "feature", metric]
-    for feature_column in datasets[0].feature_columns:
+    for feature_column in numeric_and_categorical_feature_columns:
         bins = 10
         if feature_column in datasets[0].feature_columns_categorical:
             bins = len(train_df[feature_column].unique())

validmind/tests/model_validation/sklearn/WeakspotsDiagnosis.py CHANGED Viewed

@@ -211,6 +211,19 @@ def WeakspotsDiagnosis(
     improvement.
     """
     feature_columns = features_columns or datasets[0].feature_columns
+    numeric_and_categorical_columns = (
+        datasets[0].feature_columns_numeric + datasets[0].feature_columns_categorical
+    )
+    feature_columns = [
+        col for col in feature_columns if col in numeric_and_categorical_columns
+    ]
+    if not feature_columns:
+        raise ValueError(
+            "No valid numeric or categorical columns found in features_columns"
+        )
     if not all(col in datasets[0].feature_columns for col in feature_columns):
         raise ValueError(
             "Column(s) provided in features_columns do not exist in the dataset"

validmind/tests/prompt_validation/Bias.py CHANGED Viewed

@@ -45,7 +45,7 @@ Prompt:
 @tags("llm", "few_shot")
 @tasks("text_classification", "text_summarization")
-def Bias(model, min_threshold=7):
+def Bias(model, min_threshold=7, judge_llm=None):
     """
     Assesses potential bias in a Large Language Model by analyzing the distribution and order of exemplars in the
     prompt.
@@ -100,6 +100,7 @@ def Bias(model, min_threshold=7):
     response = call_model(
         system_prompt=SYSTEM,
         user_prompt=USER.format(prompt_to_test=model.prompt.template),
+        judge_llm=judge_llm,
     )
     score = get_score(response)

validmind/tests/prompt_validation/Clarity.py CHANGED Viewed

@@ -46,7 +46,7 @@ Prompt:
 @tags("llm", "zero_shot", "few_shot")
 @tasks("text_classification", "text_summarization")
-def Clarity(model, min_threshold=7):
+def Clarity(model, min_threshold=7, judge_llm=None):
     """
     Evaluates and scores the clarity of prompts in a Large Language Model based on specified guidelines.
@@ -89,6 +89,7 @@ def Clarity(model, min_threshold=7):
     response = call_model(
         system_prompt=SYSTEM,
         user_prompt=USER.format(prompt_to_test=model.prompt.template),
+        judge_llm=judge_llm,
     )
     score = get_score(response)

validmind/tests/prompt_validation/Conciseness.py CHANGED Viewed

@@ -54,7 +54,7 @@ Prompt:
 @tags("llm", "zero_shot", "few_shot")
 @tasks("text_classification", "text_summarization")
-def Conciseness(model, min_threshold=7):
+def Conciseness(model, min_threshold=7, judge_llm=None):
     """
     Analyzes and grades the conciseness of prompts provided to a Large Language Model.
@@ -97,6 +97,7 @@ def Conciseness(model, min_threshold=7):
     response = call_model(
         system_prompt=SYSTEM,
         user_prompt=USER.format(prompt_to_test=model.prompt.template),
+        judge_llm=judge_llm,
     )
     score = get_score(response)
     explanation = get_explanation(response)

validmind/tests/prompt_validation/Delimitation.py CHANGED Viewed

@@ -39,7 +39,7 @@ Prompt:
 @tags("llm", "zero_shot", "few_shot")
 @tasks("text_classification", "text_summarization")
-def Delimitation(model, min_threshold=7):
+def Delimitation(model, min_threshold=7, judge_llm=None):
     """
     Evaluates the proper use of delimiters in prompts provided to Large Language Models.
@@ -83,6 +83,7 @@ def Delimitation(model, min_threshold=7):
     response = call_model(
         system_prompt=SYSTEM,
         user_prompt=USER.format(prompt_to_test=model.prompt.template),
+        judge_llm=judge_llm,
     )
     score = get_score(response)
     explanation = get_explanation(response)

validmind/tests/prompt_validation/NegativeInstruction.py CHANGED Viewed

@@ -52,7 +52,7 @@ Prompt:
 @tags("llm", "zero_shot", "few_shot")
 @tasks("text_classification", "text_summarization")
-def NegativeInstruction(model, min_threshold=7):
+def NegativeInstruction(model, min_threshold=7, judge_llm=None):
     """
     Evaluates and grades the use of affirmative, proactive language over negative instructions in LLM prompts.
@@ -101,6 +101,7 @@ def NegativeInstruction(model, min_threshold=7):
     response = call_model(
         system_prompt=SYSTEM,
         user_prompt=USER.format(prompt_to_test=model.prompt.template),
+        judge_llm=judge_llm,
     )
     score = get_score(response)
     explanation = get_explanation(response)

validmind/tests/prompt_validation/Robustness.py CHANGED Viewed

@@ -25,7 +25,7 @@ Contradictions, edge cases, typos, bad phrasing, distracting, complex or out-of-
 Be creative and think step-by-step how you would break the prompt.
 Then generate {num_tests} inputs for the user-submitted prompt template that would break the prompt.
 Each input should be different from the others.
-Each input should be retured as a new line in your response.
+Each input should be returned as a new line in your response.
 Respond only with the values to be inserted into the prompt template and do not include quotes, explanations or any extra text.
 Example:
@@ -56,7 +56,7 @@ Input:
 @tags("llm", "zero_shot", "few_shot")
 @tasks("text_classification", "text_summarization")
-def Robustness(model, dataset, num_tests=10):
+def Robustness(model, dataset, num_tests=10, judge_llm=None):
     """
     Assesses the robustness of prompts provided to a Large Language Model under varying conditions and contexts. This test
     specifically measures the model's ability to generate correct classifications with the given prompt even when the
@@ -112,6 +112,7 @@ def Robustness(model, dataset, num_tests=10):
     generated_inputs = call_model(
         system_prompt=SYSTEM.format(num_tests=num_tests),
         user_prompt=USER.format(prompt_to_test=model.prompt.template),
+        judge_llm=judge_llm,
     ).split("\n")
     responses = model.predict(

validmind/tests/prompt_validation/Specificity.py CHANGED Viewed

@@ -52,7 +52,7 @@ Prompt:
 @tags("llm", "zero_shot", "few_shot")
 @tasks("text_classification", "text_summarization")
-def Specificity(model, min_threshold=7):
+def Specificity(model, min_threshold=7, judge_llm=None):
     """
     Evaluates and scores the specificity of prompts provided to a Large Language Model (LLM), based on clarity, detail,
     and relevance.
@@ -97,6 +97,7 @@ def Specificity(model, min_threshold=7):
     response = call_model(
         system_prompt=SYSTEM,
         user_prompt=USER.format(prompt_to_test=model.prompt.template),
+        judge_llm=judge_llm,
     )
     score = get_score(response)
     explanation = get_explanation(response)

validmind/tests/prompt_validation/ai_powered_test.py CHANGED Viewed

@@ -4,7 +4,7 @@
 import re
-from validmind.ai.utils import get_client_and_model, is_configured
+from validmind.ai.utils import get_judge_config, is_configured
 missing_prompt_message = """
 Cannot run prompt validation tests on a model with no prompt.
@@ -21,7 +21,12 @@ my_vm_model = vm.init_model(
 def call_model(
-    system_prompt: str, user_prompt: str, temperature: float = 0.0, seed: int = 42
+    system_prompt: str,
+    user_prompt: str,
+    temperature: float = 0.0,
+    seed: int = 42,
+    judge_llm=None,
+    judge_embeddings=None,
 ):
     """Call LLM with the given prompts and return the response"""
     if not is_configured():
@@ -31,21 +36,17 @@ def call_model(
             "enabled for your account."
         )
-    client, model = get_client_and_model()
-    return (
-        client.chat.completions.create(
-            model=model,
-            messages=[
-                {"role": "system", "content": system_prompt.strip("\n").strip()},
-                {"role": "user", "content": user_prompt.strip("\n").strip()},
-            ],
-            temperature=temperature,
-            seed=seed,
-        )
-        .choices[0]
-        .message.content
-    )
+    judge_llm, judge_embeddings = get_judge_config(judge_llm, judge_embeddings)
+    messages = [
+        ("system", system_prompt.strip("\n").strip()),
+        ("user", user_prompt.strip("\n").strip()),
+    ]
+    return judge_llm.invoke(
+        messages,
+        temperature=temperature,
+        seed=seed,
+    ).content
 def get_score(response: str):

validmind/vm_models/result/__init__.py CHANGED Viewed

@@ -2,6 +2,20 @@
 # See the LICENSE file in the root of this repository for details.
 # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
-from .result import ErrorResult, RawData, Result, ResultTable, TestResult
+from .result import (
+    ErrorResult,
+    RawData,
+    Result,
+    ResultTable,
+    TestResult,
+    TextGenerationResult,
+)
-__all__ = ["ErrorResult", "RawData", "Result", "ResultTable", "TestResult"]
+__all__ = [
+    "ErrorResult",
+    "RawData",
+    "Result",
+    "ResultTable",
+    "TestResult",
+    "TextGenerationResult",
+]

validmind/vm_models/result/result.py CHANGED Viewed

@@ -129,6 +129,7 @@ class Result:
     result_id: str = None
     name: str = None
+    result_type: str = None
     def __str__(self) -> str:
         """May be overridden by subclasses."""
@@ -445,6 +446,7 @@ class TestResult(Result):
     async def log_async(
         self,
         section_id: str = None,
+        content_id: str = None,
         position: int = None,
         config: Dict[str, bool] = None,
     ):
@@ -464,7 +466,7 @@ class TestResult(Result):
                 )
             )
-        if self.tables or self.figures or self.description:
+        if self.tables:
             tasks.append(
                 api_client.alog_test_result(
                     result=self.serialize(),
@@ -473,30 +475,32 @@ class TestResult(Result):
                     config=config,
                 )
             )
+        if self.figures:
             tasks.extend(
                 [api_client.alog_figure(figure) for figure in (self.figures or [])]
             )
+        if self.description:
+            revision_name = (
+                AI_REVISION_NAME
+                if self._was_description_generated
+                else DEFAULT_REVISION_NAME
+            )
-            if self.description:
-                revision_name = (
-                    AI_REVISION_NAME
-                    if self._was_description_generated
-                    else DEFAULT_REVISION_NAME
-                )
-                tasks.append(
-                    update_metadata(
-                        content_id=f"test_description:{self.result_id}::{revision_name}",
-                        text=self.description,
-                    )
+            tasks.append(
+                update_metadata(
+                    content_id=f"{content_id}::{revision_name}"
+                    if content_id
+                    else f"test_description:{self.result_id}::{revision_name}",
+                    text=self.description,
                 )
+            )
         return await asyncio.gather(*tasks)
     def log(
         self,
         section_id: str = None,
+        content_id: str = None,
         position: int = None,
         unsafe: bool = False,
         config: Dict[str, bool] = None,
@@ -506,6 +510,7 @@ class TestResult(Result):
         Args:
             section_id (str): The section ID within the model document to insert the
                 test result.
+            content_id (str): The content ID to log the result to.
             position (int): The position (index) within the section to insert the test
                 result.
             unsafe (bool): If True, log the result even if it contains sensitive data
@@ -533,6 +538,7 @@ class TestResult(Result):
         run_async(
             self.log_async,
             section_id=section_id,
+            content_id=content_id,
             position=position,
             config=config,
         )
@@ -568,3 +574,110 @@ class TestResult(Result):
             raise InvalidParameterError(
                 f"Values for config keys must be boolean. Non-boolean values found for keys: {', '.join(non_bool_keys)}"
             )
+@dataclass
+class TextGenerationResult(Result):
+    """Test result."""
+    name: str = "Text Generation Result"
+    ref_id: str = None
+    title: Optional[str] = None
+    doc: Optional[str] = None
+    description: Optional[Union[str, DescriptionFuture]] = None
+    params: Optional[Dict[str, Any]] = None
+    metadata: Optional[Dict[str, Any]] = None
+    _was_description_generated: bool = False
+    def __post_init__(self):
+        if self.ref_id is None:
+            self.ref_id = str(uuid4())
+    def __repr__(self) -> str:
+        attrs = [
+            attr
+            for attr in [
+                "doc",
+                "description",
+                "params",
+            ]
+            if getattr(self, attr) is not None
+            and (
+                len(getattr(self, attr)) > 0
+                if isinstance(getattr(self, attr), list)
+                else True
+            )
+        ]
+        return f'TextGenerationResult("{self.result_id}", {", ".join(attrs)})'
+    def __getattribute__(self, name):
+        # lazy load description if its a DescriptionFuture (generated in background)
+        if name == "description":
+            description = super().__getattribute__("description")
+            if isinstance(description, DescriptionFuture):
+                self._was_description_generated = True
+                self.description = description.get_description()
+        return super().__getattribute__(name)
+    @property
+    def test_name(self) -> str:
+        """Get the test name, using custom title if available."""
+        return self.title or test_id_to_name(self.result_id)
+    def to_widget(self):
+        template_data = {
+            "test_name": self.test_name,
+            "description": self.description.replace("h3", "strong"),
+            "params": (
+                json.dumps(self.params, cls=NumpyEncoder, indent=2)
+                if self.params
+                else None
+            ),
+        }
+        rendered = get_result_template().render(**template_data)
+        widgets = [HTML(rendered)]
+        return VBox(widgets)
+    def serialize(self):
+        """Serialize the result for the API."""
+        return {
+            "test_name": self.result_id,
+            "title": self.title,
+            "ref_id": self.ref_id,
+            "params": self.params,
+            "metadata": self.metadata,
+        }
+    async def log_async(
+        self,
+        content_id: str = None,
+    ):
+        return await asyncio.gather(
+            update_metadata(
+                content_id=f"{content_id}",
+                text=self.description,
+            )
+        )
+    def log(
+        self,
+        content_id: str = None,
+    ):
+        """Log the result to ValidMind.
+        Args:
+            section_id (str): The section ID within the model document to insert the
+                test result.
+            content_id (str): The content ID to log the result to.
+            position (int): The position (index) within the section to insert the test
+                result.
+        """
+        run_async(
+            self.log_async,
+            content_id=content_id,
+        )

{validmind-2.8.22.dist-info → validmind-2.8.26.dist-info}/METADATA RENAMED Viewed

@@ -1,11 +1,11 @@
 Metadata-Version: 2.3
 Name: validmind
-Version: 2.8.22
+Version: 2.8.26
 Summary: ValidMind Library
 License: Commercial License
 Author: Andres Rodriguez
 Author-email: andres@validmind.ai
-Requires-Python: >=3.8.1,<3.12
+Requires-Python: >=3.9.0,<3.12
 Classifier: License :: Other/Proprietary License
 Classifier: Programming Language :: Python :: 3
 Classifier: Programming Language :: Python :: 3.9
@@ -22,6 +22,7 @@ Requires-Dist: bert-score (>=0.3.13)
 Requires-Dist: catboost
 Requires-Dist: datasets (>=2.10.0,<3.0.0)
 Requires-Dist: evaluate
+Requires-Dist: h11 (>=0.16.0)
 Requires-Dist: ipywidgets
 Requires-Dist: kaleido (>=0.2.1,!=0.2.1.post1)
 Requires-Dist: langchain-openai (>=0.1.8) ; extra == "all" or extra == "llm"
@@ -53,7 +54,7 @@ Requires-Dist: statsmodels
 Requires-Dist: tabulate (>=0.8.9,<0.9.0)
 Requires-Dist: textblob (>=0.18.0.post0,<0.19.0)
 Requires-Dist: tiktoken
-Requires-Dist: torch (>=1.10.0) ; extra == "all" or extra == "llm" or extra == "pytorch"
+Requires-Dist: torch (==2.7.0) ; extra == "all" or extra == "llm" or extra == "pytorch"
 Requires-Dist: tqdm
 Requires-Dist: transformers (>=4.32.0,<5.0.0) ; extra == "all" or extra == "huggingface" or extra == "llm"
 Requires-Dist: xgboost (>=1.5.2,<3)

{validmind-2.8.22.dist-info → validmind-2.8.26.dist-info}/RECORD RENAMED Viewed

@@ -1,8 +1,8 @@
-validmind/__init__.py,sha256=qmC6WY6ifIQpCU38V91EN91UlIEcOG7a9jWK3smZJoY,4220
-validmind/__version__.py,sha256=KYh5XihUKlbKxUpWkhRlJg_POPRyBFW9RUgXYfdQc0s,23
+validmind/__init__.py,sha256=7nOHbSRUKtpIuHvf6oQd4D9_R8oh1PQ2CkeU62S14A0,4329
+validmind/__version__.py,sha256=XTvkYgNn06R6oLNbCxMlIA63zR9bQTVXFHPFNHviOyA,23
 validmind/ai/test_descriptions.py,sha256=eBF09MAyqAAD-Ah7vxXVRbHxOmGx5_10ZkoJmMvEaEA,7123
-validmind/ai/utils.py,sha256=O5gTkvGsPCCdKCdBGvpDaJM1oL_msdm2xKkf9fFpIy8,4172
-validmind/api_client.py,sha256=slvf0FJ8olYsK1-EPetMVYV7UvhjMAIFBxwBWSVT9BI,16807
+validmind/ai/utils.py,sha256=m0ru4h7z8Fe1dEOtXoonhgYKtdLMSEUakwacoATZbrs,8493
+validmind/api_client.py,sha256=WNAdiYc2NctAFc2itLdz-0mf3_4JPghW4pPF_VAn5jw,16970
 validmind/client.py,sha256=XKb4uc7yXVV_3NH9-zTrS9jCbLPX2zZZU12vKKlSpIc,19049
 validmind/client_config.py,sha256=O1gopTaNADM4ZVPj383AJTjcpjdxyEvUQY5cFt7nbIs,1366
 validmind/datasets/__init__.py,sha256=c0hQZN_6GrUEJxdFHdQaEsQrSYNABG84ZCY0H-PzOZk,260
@@ -60,6 +60,8 @@ validmind/datasets/regression/models/fred_loan_rates_model_3.pkl,sha256=IogZPcUQ
 validmind/datasets/regression/models/fred_loan_rates_model_4.pkl,sha256=cSxhpcrI4hCbxCwZwE2-nr7KObbWpDii3NzpECoXmmM,48292
 validmind/datasets/regression/models/fred_loan_rates_model_5.pkl,sha256=FkNLHq9xkPMbYks_vyMjFL371mw9SQYbP1iX9lY4Ljo,60343
 validmind/errors.py,sha256=yluOjbvvurjIAVpN6V1L5R1f_aXr7mmTrTFjzmgn_Uw,8268
+validmind/experimental/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+validmind/experimental/agents.py,sha256=UAn62qflCYnzS1m2XL_y3xUNaw0PJr9dRvNb-c-rUtI,2040
 validmind/html_templates/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 validmind/html_templates/content_blocks.py,sha256=vFMRS4Ogq4RZq88WzG3teNEOq3U4OLgLDzD3lBx4h-g,4050
 validmind/input_registry.py,sha256=bgZqJhrBCDoTV54Eq6YhNcU9yn5GjH0aidDwrnKm_pI,1043
@@ -118,7 +120,7 @@ validmind/tests/data_validation/LJungBox.py,sha256=jytIC1iOaV3g5kxEQ93RPftp1mO0H
 validmind/tests/data_validation/LaggedCorrelationHeatmap.py,sha256=Irh8SvFQELqqq2FPR5PgUbcdjuCgIgB7FaZHkyCxu7Y,4571
 validmind/tests/data_validation/MissingValues.py,sha256=elEhhwXTD68B8iLB2HTgAK-oM_i5yzJ8v32atK-F5ro,2962
 validmind/tests/data_validation/MissingValuesBarPlot.py,sha256=BB-yO2uXjWIqy_bNJ_rJ8oosHTzMl7acGIYDGUy69dI,5572
-validmind/tests/data_validation/MutualInformation.py,sha256=HpQ4alM2W6vdnGuukncflrH8zoDz5o2HAF9hWw64j40,4671
+validmind/tests/data_validation/MutualInformation.py,sha256=8Sp8K75dP-F24l_WeqRpykri5--E1GTQLxnTarLhNUc,5157
 validmind/tests/data_validation/PearsonCorrelationMatrix.py,sha256=YU9WD3VURjzXyYvCTXcgZnFvmg2rjTOMwKtiZ57ZMJg,3873
 validmind/tests/data_validation/PhillipsPerronArch.py,sha256=4abwhMBcdxTxY9aMogL5hEvCyATnvHb66mGssE1AJuk,4254
 validmind/tests/data_validation/ProtectedClassesCombination.py,sha256=KOsSciNplk1A9DI-wS-m5qKm5u-7gDDDfceEusZiufo,6920
@@ -196,17 +198,17 @@ validmind/tests/model_validation/embeddings/StabilityAnalysisSynonyms.py,sha256=
 validmind/tests/model_validation/embeddings/StabilityAnalysisTranslation.py,sha256=Y1N4AeYlzD2Mpcvd4BWVIOIWzHjycWxSRYp8J_gr5_k,5839
 validmind/tests/model_validation/embeddings/TSNEComponentsPairwisePlots.py,sha256=M47btgIGdXon8F7phqqcXrExnO3DvHi-NSBdgDjy_OE,4752
 validmind/tests/model_validation/embeddings/utils.py,sha256=Hr8jpVB0YfaOEYsO_tiwhU1UgXoJFHHlRqFcHDNXHoU,1896
-validmind/tests/model_validation/ragas/AnswerCorrectness.py,sha256=tfJBMGOXMAqvhiCcDPsTVNnG4TmZ8uPnslAA3ydhf8M,5592
-validmind/tests/model_validation/ragas/AspectCritic.py,sha256=_TAaF0s9XFYZ0aKgnXHSS6pvaTuRvWfxQwCicVYQwwY,6997
-validmind/tests/model_validation/ragas/ContextEntityRecall.py,sha256=hRPY-mjkTMUAnYnZuDJ73WtLu0dGIz4peePJ5LHrbSE,5430
-validmind/tests/model_validation/ragas/ContextPrecision.py,sha256=HvoQas7DeC7gqGlbEDBEmtZYyBzAxUANOleaZdGZXnU,5233
-validmind/tests/model_validation/ragas/ContextPrecisionWithoutReference.py,sha256=1EHCFW_CiKNU_hJviTClGZ5fnWSa47hLIgkPRDdoIEs,4952
-validmind/tests/model_validation/ragas/ContextRecall.py,sha256=EctkDWATbWissQG7igk5ODozp65tt7nm5TLrR37mK6c,5119
-validmind/tests/model_validation/ragas/Faithfulness.py,sha256=g3ST3EcgrZrIw4yh0J9J5ez_DQDV3v3Cz7wmjCjxKa8,5406
-validmind/tests/model_validation/ragas/NoiseSensitivity.py,sha256=jSKZSp9h4q_t53VObb-Wv7P0EaGBzoJSxokpjNj4M-k,6532
-validmind/tests/model_validation/ragas/ResponseRelevancy.py,sha256=Do55KeMzpYhCRRTxKp4iaQi6ATeavogJwOnnH_tX4CA,5684
-validmind/tests/model_validation/ragas/SemanticSimilarity.py,sha256=QyBInmPsdJ5HZ_ierKfrI73Q-69uk4zOeFrTi85IdCM,4918
-validmind/tests/model_validation/ragas/utils.py,sha256=EV3aHvWMOgVs6EIRS0nfOa7BRwpF-eSqlOQfsoZFTC8,3429
+validmind/tests/model_validation/ragas/AnswerCorrectness.py,sha256=unX2l4aVnRJTkGooKZ90HbLpxthvKqiLCDQvvuviexU,5682
+validmind/tests/model_validation/ragas/AspectCritic.py,sha256=ejtznzu-tWFy7Ex0AgYts4HSBqYVWQNKxPIZ_nv6MQM,7087
+validmind/tests/model_validation/ragas/ContextEntityRecall.py,sha256=9Uyr7d2zSFxWehHKq5DKt_g-vZRQQreJcgl59f5txNQ,5520
+validmind/tests/model_validation/ragas/ContextPrecision.py,sha256=kNswHsi2gtl88gGi9gUE40AGeqJS4X-M60zFAatnvHQ,5323
+validmind/tests/model_validation/ragas/ContextPrecisionWithoutReference.py,sha256=HjP3Asb8tvnEHUnWLu3Z6eZi_mr8pmVfboXoowwuvQU,5042
+validmind/tests/model_validation/ragas/ContextRecall.py,sha256=oRNFXSFKuceK5zJj2KOJiYh5BK5xLm55ELRPTyHV5No,5209
+validmind/tests/model_validation/ragas/Faithfulness.py,sha256=cTRmMcW7zymBWDt8y9eywLmJsUHq7QU1I6xZPTQlkTw,5496
+validmind/tests/model_validation/ragas/NoiseSensitivity.py,sha256=9ZDoJbrd9L3yVhEyJBTyEbE6NC-yPne_gQQPeQyE2fY,6606
+validmind/tests/model_validation/ragas/ResponseRelevancy.py,sha256=mJoELbkNNNd9UhL-cJz27sif6_i1tyVZagIBFr51Xqo,5759
+validmind/tests/model_validation/ragas/SemanticSimilarity.py,sha256=ZjtLS7GRqVxYbY5PGzhGUDXJLKH8ItyH-5dDcu--nEQ,5008
+validmind/tests/model_validation/ragas/utils.py,sha256=BN07JJ2egOEzQmO8w6afsrhXs_uQ0RoYAPuOAaKFXrY,2785
 validmind/tests/model_validation/sklearn/AdjustedMutualInformation.py,sha256=UXhiNK6ZakRgln968y3jAMgNVsj5LpgGCnSHDRUFrWw,2926
 validmind/tests/model_validation/sklearn/AdjustedRandIndex.py,sha256=7zlFapC21nVqXYc73FQxR0XeTit2l-h7F76xCFS9FUQ,2756
 validmind/tests/model_validation/sklearn/CalibrationCurve.py,sha256=EmW8UvT6gcBC-dw6zr43MbAGTBeVTPruHjYu5GWB5p4,4232
@@ -226,7 +228,7 @@ validmind/tests/model_validation/sklearn/MinimumF1Score.py,sha256=pQn9p15AUo5ref
 validmind/tests/model_validation/sklearn/MinimumROCAUCScore.py,sha256=6YSITuOkQwq1UcxqTWHnjrpPTfN_9Mny-wDWdRFD8I4,3825
 validmind/tests/model_validation/sklearn/ModelParameters.py,sha256=CF3cZGJLxiABnf1CQ_u_iX_ylgvpElH3jF2DBXbXZJY,3060
 validmind/tests/model_validation/sklearn/ModelsPerformanceComparison.py,sha256=wDxGUXgfzLA80wfjoRz7CzHO8NiQfuJyxIfuVFOuLYA,4658
-validmind/tests/model_validation/sklearn/OverfitDiagnosis.py,sha256=oCw-Hu6J3Lb8NpB_3yV_C3u2Y2_jSt7hDdPf10jklrA,10234
+validmind/tests/model_validation/sklearn/OverfitDiagnosis.py,sha256=FaTxA_OpTUxv92Zhi8oZ0KrUYjUWlVrte7HYgQqktPk,10557
 validmind/tests/model_validation/sklearn/PermutationFeatureImportance.py,sha256=zdZe3f6n5WmWei9T2IUJGjYPjI0YRSoSXza5rfrYZ48,4226
 validmind/tests/model_validation/sklearn/PopulationStabilityIndex.py,sha256=zm2aP_auO6khOmei_or_HhnFgMFQmeU6IgYJuuXYkHM,9045
 validmind/tests/model_validation/sklearn/PrecisionRecallCurve.py,sha256=WF3htr_Z5BnVbxMV4Ehx_BUSWYXhouaqQ45MUbuU9co,3821
@@ -242,7 +244,7 @@ validmind/tests/model_validation/sklearn/ScoreProbabilityAlignment.py,sha256=lvP
 validmind/tests/model_validation/sklearn/SilhouettePlot.py,sha256=F2RMMm1ilEwj6hGfF50n_9_n3JnQhwJBxQl8hY6xjuk,5076
 validmind/tests/model_validation/sklearn/TrainingTestDegradation.py,sha256=tSi2pnWxqSMkakebTLPHGHGn_7YSukPocNhVDixDul8,4519
 validmind/tests/model_validation/sklearn/VMeasure.py,sha256=2zkB6W4oYWPr03SETwjaQCle3_dGDItCqa3DQ4qRLcM,2841
-validmind/tests/model_validation/sklearn/WeakspotsDiagnosis.py,sha256=dQ_2RGtbFdI7394XLz6fU7Gu8lteVslmZ5jUZTLkh4Q,11665
+validmind/tests/model_validation/sklearn/WeakspotsDiagnosis.py,sha256=DtRhJZ1NIpzIf7F4jXOo5XUX7g2VRvZGmWHIBYuCeaE,12055
 validmind/tests/model_validation/sklearn/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 validmind/tests/model_validation/statsmodels/AutoARIMA.py,sha256=4QNcEEY_iqt6wCzYwsBwZQ-aacZ1erX5uHbPtKmbTJk,4896
 validmind/tests/model_validation/statsmodels/CumulativePredictionProbabilities.py,sha256=yXouMfH8JWrD3o6IAoHjAeXHuj-nVSxV-_SVw8SBePw,4886
@@ -277,15 +279,15 @@ validmind/tests/ongoing_monitoring/ScoreBandsDrift.py,sha256=4nqu3yfiIlhs7RG-6Eg
 validmind/tests/ongoing_monitoring/ScorecardHistogramDrift.py,sha256=OIU6wEIH4VbwhCo6Qirl0YnzxQQbbhMhjFiMnmKoatA,7260
 validmind/tests/ongoing_monitoring/TargetPredictionDistributionPlot.py,sha256=xDPh6KWSy8YXmHV8SI0IpSVv42R-1H3ZpjMM0zHTDNs,5141
 validmind/tests/output.py,sha256=UXSZDiW_GD411QP2F9r2Vh7uXtb5a2y990bNRZhnZVQ,6153
-validmind/tests/prompt_validation/Bias.py,sha256=OLm-NnGc_YC-U65bL9uBSXlXAjqAKUAuhPFn3gRA8gA,5800
-validmind/tests/prompt_validation/Clarity.py,sha256=q7j9ERdqA86dgi5kF2Mh9ynB-sasm828licy4J8UVdg,4945
-validmind/tests/prompt_validation/Conciseness.py,sha256=ZKlgmC9KCJ-Et-7KHXmkQRJcWCovlCP04uehKPedz0I,4689
-validmind/tests/prompt_validation/Delimitation.py,sha256=b-dncIbEXIY4p9u5FmQRfwEC3uQS9ZPRqdcnRIUcGGw,4057
-validmind/tests/prompt_validation/NegativeInstruction.py,sha256=2tZ20idQCoMVDVQDqHtMes4n3Gzgy9PgvZMzXIyUUkk,5372
-validmind/tests/prompt_validation/Robustness.py,sha256=Rxszo48eL5VRF8sqCXOtUUnP4b2xKSxPv5XHS3W1oCg,5802
-validmind/tests/prompt_validation/Specificity.py,sha256=XccjrTv_jHBZrsxWxCAJu82AMBhLwQ59Vnxm3BnJF5E,4812
+validmind/tests/prompt_validation/Bias.py,sha256=eYUlVPn3iqMJ4lnQbZxYty6UxvLd0sbPdbY1EtywV3w,5845
+validmind/tests/prompt_validation/Clarity.py,sha256=OlzhESenRUftai2L1fVI4rftLflp2_ztwr_llq3rvbU,4990
+validmind/tests/prompt_validation/Conciseness.py,sha256=RQkC3jH9c96PBYCBk6-MLuEqstkkWFOegunEcHsNyis,4734
+validmind/tests/prompt_validation/Delimitation.py,sha256=NXa1ScNrIejiGOslncrXavzM4k-vRmAloCmrZyxAWfg,4102
+validmind/tests/prompt_validation/NegativeInstruction.py,sha256=Miasr4VQ-sCA06_lCCI4oRf_AKNjh7jV_CvtmnYxUso,5417
+validmind/tests/prompt_validation/Robustness.py,sha256=Qy82t7dtdpCcR-SPDpD43QaHomDI6e2IJIBBO-MCdwA,5848
+validmind/tests/prompt_validation/Specificity.py,sha256=VtUU7t2vDT8_fWiz9OiTEsMN-yjn3dNU7couwcbMdik,4857
 validmind/tests/prompt_validation/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-validmind/tests/prompt_validation/ai_powered_test.py,sha256=pVmJPZVw1t0a3AMcGvBYEDR_sqI1Yu0AZjbshqBUVIg,2302
+validmind/tests/prompt_validation/ai_powered_test.py,sha256=sWMf9fRXAkOpI5JYdhLHmJXlwnthjUDZGoLgCZlQZxo,2240
 validmind/tests/run.py,sha256=ftUCywJbHQ0vbZ7096iz8yq0htLbQbOWhvURjCqiqog,14211
 validmind/tests/test_providers.py,sha256=S0_yNYAor_MX5joRJntrVjV8J3ypvUcaaSqtkBqhOsI,6021
 validmind/tests/utils.py,sha256=sPnk9HWIb0IoySqL88h7uP3LixfrfKFgFebnyTUP5EE,3950
@@ -314,17 +316,17 @@ validmind/vm_models/dataset/utils.py,sha256=g6mBPrBmVYf8wJAlTxeg9DTiNvts4ZaaT5mb
 validmind/vm_models/figure.py,sha256=ZMO_nIIleNhkBV1vJeF_UUsVDCzrXNOYwV1Lbg9E0XY,6303
 validmind/vm_models/input.py,sha256=nTBQB6aqirhF-0Gmg5mYc4_vNyypvbYUfahMovcK02M,1095
 validmind/vm_models/model.py,sha256=s9pPIprHrju-qmGbzOZBcSHjZ_xgSv5ACXk92U1hEFY,6489
-validmind/vm_models/result/__init__.py,sha256=Bs5GMGDxiTsxlwCdqxz5LmGkY0_fM6-_0-3tWSRoqps,341
+validmind/vm_models/result/__init__.py,sha256=c0vMWMSY0O6ZeSCf0HfrWAI5t--4FKLEW5cZ2EZ70Ms,443
 validmind/vm_models/result/result.jinja,sha256=Yvovwm5gInCBukFRlvJXNlDIUpl2eFz4dz1lS3Sn_Gc,311
-validmind/vm_models/result/result.py,sha256=69ZPIkvLMMAup9N95W_OeVTN5MII33BJNIuEP2wuyPw,17928
+validmind/vm_models/result/result.py,sha256=NRb90F1kYHeYitItiUKYVZ32d81UKq6X1RH9DHMI9Fo,21282
 validmind/vm_models/result/utils.py,sha256=kjU8yaDBX874gebdKLA2KcCyW6ojk_nSTBZxHG7Gszc,4155
 validmind/vm_models/test_suite/__init__.py,sha256=tfTYd8yvzsDXzk5WDKMwCzPAbvkVUyEvtY5z5BPy-zk,215
 validmind/vm_models/test_suite/runner.py,sha256=JqW8LW4X1Ri2C6wSsAGSki-JxGUGV8zmruOnxybmZ1s,5432
 validmind/vm_models/test_suite/summary.py,sha256=7P4zhfeU7a3I1MMBn8f7s-2lzdAz7U4y6LblpR89_vE,5401
 validmind/vm_models/test_suite/test.py,sha256=C8xPGKSyYF9oMJ3VegwFJDF7cwYlIgtQoQ7nzXIS1uc,3914
 validmind/vm_models/test_suite/test_suite.py,sha256=CciC6IhrLEeWwcpY3Np8EmQCB8XEF2ljwEXcvmNYgZc,5090
-validmind-2.8.22.dist-info/LICENSE,sha256=XonPUfwjvrC5Ombl3y-ko0Wubb1xdG_7nzvIbkZRKHw,35772
-validmind-2.8.22.dist-info/METADATA,sha256=cLg53_Ec1ZxQQ1eiIsBugTOh8e2_wjgCnPfWBWAEuZc,6032
-validmind-2.8.22.dist-info/WHEEL,sha256=fGIA9gx4Qxk2KDKeNJCbOEwSrmLtjWCwzBz351GyrPQ,88
-validmind-2.8.22.dist-info/entry_points.txt,sha256=HuW7YyOv9u_OEWpViQXtv0nfoI67uieJHawKWA4Hv9A,76
-validmind-2.8.22.dist-info/RECORD,,
+validmind-2.8.26.dist-info/LICENSE,sha256=XonPUfwjvrC5Ombl3y-ko0Wubb1xdG_7nzvIbkZRKHw,35772
+validmind-2.8.26.dist-info/METADATA,sha256=cXBgejYYCohKO95F4HEa-uamzw0cLjgKr8LlBhMQ0eA,6061
+validmind-2.8.26.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
+validmind-2.8.26.dist-info/entry_points.txt,sha256=HuW7YyOv9u_OEWpViQXtv0nfoI67uieJHawKWA4Hv9A,76
+validmind-2.8.26.dist-info/RECORD,,

{validmind-2.8.22.dist-info → validmind-2.8.26.dist-info}/WHEEL RENAMED Viewed

@@ -1,4 +1,4 @@
 Wheel-Version: 1.0
-Generator: poetry-core 2.1.2
+Generator: poetry-core 2.1.3
 Root-Is-Purelib: true
 Tag: py3-none-any

{validmind-2.8.22.dist-info → validmind-2.8.26.dist-info}/LICENSE RENAMED Viewed

File without changes

{validmind-2.8.22.dist-info → validmind-2.8.26.dist-info}/entry_points.txt RENAMED Viewed

File without changes

validmind 2.8.22__py3-none-any.whl → 2.8.26__py3-none-any.whl

validmind 2.8.22py3-none-any.whl → 2.8.26py3-none-any.whl