PyPI - validmind - Versions diffs - 2.5.18__py3-none-any.whl → 2.5.23__py3-none-any.whl - Mend

validmind 2.5.18py3-none-any.whl → 2.5.23py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (30) hide show

validmind/__init__.py +7 -46
validmind/__version__.py +1 -1
validmind/ai/test_result_description/context.py +2 -2
validmind/api_client.py +131 -266
validmind/client_config.py +1 -3
validmind/datasets/__init__.py +1 -1
validmind/datasets/nlp/__init__.py +1 -1
validmind/errors.py +20 -30
validmind/tests/data_validation/ProtectedClassesCombination.py +17 -9
validmind/tests/data_validation/ProtectedClassesDisparity.py +12 -4
validmind/tests/data_validation/ProtectedClassesThresholdOptimizer.py +18 -10
validmind/tests/load.py +25 -5
validmind/tests/model_validation/ragas/AnswerCorrectness.py +12 -6
validmind/tests/model_validation/ragas/AnswerRelevance.py +12 -6
validmind/tests/model_validation/ragas/AnswerSimilarity.py +12 -6
validmind/tests/model_validation/ragas/AspectCritique.py +19 -13
validmind/tests/model_validation/ragas/ContextEntityRecall.py +12 -6
validmind/tests/model_validation/ragas/ContextPrecision.py +12 -6
validmind/tests/model_validation/ragas/ContextRecall.py +12 -6
validmind/tests/model_validation/ragas/ContextUtilization.py +12 -6
validmind/tests/model_validation/ragas/Faithfulness.py +12 -6
validmind/tests/model_validation/ragas/NoiseSensitivity.py +12 -6
validmind/tests/model_validation/sklearn/ClassifierPerformance.py +5 -2
validmind/tests/run.py +219 -116
validmind/vm_models/test/result_wrapper.py +4 -4
{validmind-2.5.18.dist-info → validmind-2.5.23.dist-info}/METADATA +12 -12
{validmind-2.5.18.dist-info → validmind-2.5.23.dist-info}/RECORD +30 -30
{validmind-2.5.18.dist-info → validmind-2.5.23.dist-info}/WHEEL +1 -1
{validmind-2.5.18.dist-info → validmind-2.5.23.dist-info}/LICENSE +0 -0
{validmind-2.5.18.dist-info → validmind-2.5.23.dist-info}/entry_points.txt +0 -0

validmind/tests/run.py CHANGED Viewed

@@ -2,6 +2,7 @@
 # See the LICENSE file in the root of this repository for details.
 # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
+import itertools
 from itertools import product
 from typing import Any, Dict, List, Union
 from uuid import uuid4
@@ -137,7 +138,7 @@ def _combine_figures(figure_lists: List[List[Any]], input_groups: List[Dict[str,
     title_template = "{current_title}({input_description})"
     for idx, figures in enumerate(figure_lists):
-        input_group = input_groups[idx]
+        input_group = input_groups[idx]["inputs"]
         if is_plotly_figure(figures[0].figure):
             _update_plotly_titles(figures, input_group, title_template)
         elif is_matplotlib_figure(figures[0].figure):
@@ -171,63 +172,55 @@ def _combine_unit_metrics(results: List[MetricResultWrapper]):
 def metric_comparison(
     results: List[MetricResultWrapper],
     test_id: TestID,
-    input_groups: Union[Dict[str, List[Any]], List[Dict[str, Any]]],
+    input_params_groups: Union[Dict[str, List[Any]], List[Dict[str, Any]]],
     output_template: str = None,
     generate_description: bool = True,
 ):
     """Build a comparison result for multiple metric results"""
     ref_id = str(uuid4())
+    # Treat param_groups and input_groups as empty lists if they are None or empty
+    input_params_groups = input_params_groups or [{}]
     input_group_strings = []
-    for group in input_groups:
+    for input_params in input_params_groups:
         new_group = {}
-        for k, v in group.items():
-            if isinstance(v, str):
-                new_group[k] = v
-            elif hasattr(v, "input_id"):
-                new_group[k] = v.input_id
-            elif isinstance(v, list) and all(hasattr(item, "input_id") for item in v):
-                new_group[k] = ", ".join([item.input_id for item in v])
+        for param_k, param_v in input_params["params"].items():
+            new_group[param_k] = param_v
+        for metric_k, metric_v in input_params["inputs"].items():
+            # Process values in the input group
+            if isinstance(metric_v, str):
+                new_group[metric_k] = metric_v
+            elif hasattr(metric_v, "input_id"):
+                new_group[metric_k] = metric_v.input_id
+            elif isinstance(metric_v, list) and all(
+                hasattr(item, "input_id") for item in metric_v
+            ):
+                new_group[metric_k] = ", ".join([item.input_id for item in metric_v])
             else:
-                raise ValueError(f"Unsupported type for value: {v}")
+                raise ValueError(f"Unsupported type for value: {metric_v}")
         input_group_strings.append(new_group)
     # handle unit metrics (scalar values) by adding it to the summary
     _combine_unit_metrics(results)
-    # Check if the results list contains a result object with a metric
-    if any(
-        hasattr(result, "metric")
-        and hasattr(result.metric, "summary")
-        and result.metric.summary
-        for result in results
-    ):
-        # Compute merged summaries only if there is a result with a metric
-        merged_summary = _combine_summaries(
-            [
-                {"inputs": input_group_strings[i], "summary": result.metric.summary}
-                for i, result in enumerate(results)
-            ]
-        )
-    else:
-        merged_summary = None
-    # Check if the results list contains a result object with figures
-    if any(hasattr(result, "figures") and result.figures for result in results):
-        # Compute merged figures only if there is at least one result with figures
-        merged_figures = _combine_figures(
-            [result.figures for result in results],
-            input_groups,
-        )
-        # Patch figure metadata so they are connected to the comparison result
-        if merged_figures and len(merged_figures):
-            for i, figure in enumerate(merged_figures):
-                figure.key = f"{figure.key}-{i}"
-                figure.metadata["_name"] = test_id
-                figure.metadata["_ref_id"] = ref_id
-    else:
-        merged_figures = None
+    merged_summary = _combine_summaries(
+        [
+            {"inputs": input_group_strings[i], "summary": result.metric.summary}
+            for i, result in enumerate(results)
+        ]
+    )
+    merged_figures = _combine_figures(
+        [result.figures for result in results], input_params_groups
+    )
+    # Patch figure metadata so they are connected to the comparison result
+    if merged_figures and len(merged_figures):
+        for i, figure in enumerate(merged_figures):
+            figure.key = f"{figure.key}-{i}"
+            figure.metadata["_name"] = test_id
+            figure.metadata["_ref_id"] = ref_id
     return MetricResultWrapper(
         result_id=test_id,
@@ -236,14 +229,14 @@ def metric_comparison(
                 test_id=test_id,
                 default_description=f"Comparison test result for {test_id}",
                 summary=merged_summary.serialize() if merged_summary else None,
-                figures=merged_figures if merged_figures else None,
+                figures=merged_figures,
                 should_generate=generate_description,
             ),
         ],
         inputs=[
             item.input_id if hasattr(item, "input_id") else item
-            for group in input_groups
-            for input in group.values()
+            for group in input_params_groups
+            for input in group["inputs"].values()
             for item in (input if isinstance(input, list) else [input])
             if hasattr(item, "input_id") or isinstance(item, str)
         ],
@@ -333,39 +326,63 @@ def threshold_test_comparison(
 def run_comparison_test(
     test_id: TestID,
-    input_grid: Union[Dict[str, List[Any]], List[Dict[str, Any]]],
+    input_grid: Union[Dict[str, List[Any]], List[Dict[str, Any]]] = None,
+    inputs: Dict[str, Any] = None,
     name: str = None,
     unit_metrics: List[TestID] = None,
+    param_grid: Union[Dict[str, List[Any]], List[Dict[str, Any]]] = None,
     params: Dict[str, Any] = None,
     show: bool = True,
     output_template: str = None,
     generate_description: bool = True,
 ):
     """Run a comparison test"""
-    if isinstance(input_grid, dict):
-        input_groups = _cartesian_product(input_grid)
+    if input_grid:
+        if isinstance(input_grid, dict):
+            input_groups = _cartesian_product(input_grid)
+        else:
+            input_groups = input_grid
     else:
-        input_groups = input_grid
+        input_groups = list(inputs) if inputs else []
+    if param_grid:
+        if isinstance(param_grid, dict):
+            param_groups = _cartesian_product(param_grid)
+        else:
+            param_groups = param_grid
+    else:
+        param_groups = list(params) if inputs else []
+    input_groups = input_groups or [{}]
+    param_groups = param_groups or [{}]
+    # Use itertools.product to compute the Cartesian product
+    inputs_params_product = [
+        {
+            "inputs": item1,
+            "params": item2,
+        }  # Merge dictionaries from input_groups and param_groups
+        for item1, item2 in itertools.product(input_groups, param_groups)
+    ]
     results = [
         run_test(
             test_id,
             name=name,
             unit_metrics=unit_metrics,
-            inputs=inputs,
+            inputs=inputs_params["inputs"],
             show=False,
-            params=params,
+            params=inputs_params["params"],
             __generate_description=False,
         )
-        for inputs in input_groups
+        for inputs_params in (inputs_params_product or [{}])
     ]
     if isinstance(results[0], MetricResultWrapper):
         func = metric_comparison
     else:
         func = threshold_test_comparison
-    result = func(results, test_id, input_groups, output_template, generate_description)
+    result = func(
+        results, test_id, inputs_params_product, output_template, generate_description
+    )
     if show:
         result.show()
@@ -376,6 +393,7 @@ def run_comparison_test(
 def run_test(
     test_id: TestID = None,
     params: Dict[str, Any] = None,
+    param_grid: Union[Dict[str, List[Any]], List[Dict[str, Any]]] = None,
     inputs: Dict[str, Any] = None,
     input_grid: Union[Dict[str, List[Any]], List[Dict[str, Any]]] = None,
     name: str = None,
@@ -385,83 +403,81 @@ def run_test(
     __generate_description: bool = True,
     **kwargs,
 ) -> Union[MetricResultWrapper, ThresholdTestResultWrapper]:
-    """Run a test by test ID
-    Args:
-        test_id (TestID, optional): The test ID to run. Not required if `unit_metrics` is provided.
-        params (dict, optional): A dictionary of parameters to pass into the test. Params
-            are used to customize the test behavior and are specific to each test. See the
-            test details for more information on the available parameters. Defaults to None.
-        inputs (Dict[str, Any], optional): A dictionary of test inputs to pass into the
-            test. Inputs are either models or datasets that have been initialized using
-            vm.init_model() or vm.init_dataset(). Defaults to None.
-        input_grid (Union[Dict[str, List[Any]], List[Dict[str, Any]]], optional): To run
-            a comparison test, provide either a dictionary of inputs where the keys are
-            the input names and the values are lists of different inputs, or a list of
-            dictionaries where each dictionary is a set of inputs to run the test with.
-            This will run the test multiple times with different sets of inputs and then
-            combine the results into a single output. When passing a dictionary, the grid
-            will be created by taking the Cartesian product of the input lists. Its simply
-            a more convenient way of forming the input grid as opposed to passing a list of
-            all possible combinations. Defaults to None.
-        name (str, optional): The name of the test (used to create a composite metric
-            out of multiple unit metrics) - required when running multiple unit metrics
-        unit_metrics (list, optional): A list of unit metric IDs to run as a composite
-            metric - required when running multiple unit metrics
-        output_template (str, optional): A jinja2 html template to customize the output
-            of the test. Defaults to None.
-        show (bool, optional): Whether to display the results. Defaults to True.
-        **kwargs: Keyword inputs to pass into the test (same as `inputs` but as keyword
-            args instead of a dictionary):
-            - dataset: A validmind Dataset object or a Pandas DataFrame
-            - model: A model to use for the test
-            - models: A list of models to use for the test
-            - dataset: A validmind Dataset object or a Pandas DataFrame
+    """Run a test by test ID.
+    test_id (TestID, optional): The test ID to run. Not required if `unit_metrics` is provided.
+    params (dict, optional): A dictionary of parameters to pass into the test. Params
+        are used to customize the test behavior and are specific to each test. See the
+        test details for more information on the available parameters. Defaults to None.
+    param_grid (Union[Dict[str, List[Any]], List[Dict[str, Any]]], optional): To run
+        a comparison test, provide either a dictionary of parameters where the keys are
+        the parameter names and the values are lists of different parameters, or a list of
+        dictionaries where each dictionary is a set of parameters to run the test with.
+        This will run the test multiple times with different sets of parameters and then
+        combine the results into a single output. When passing a dictionary, the grid
+        will be created by taking the Cartesian product of the parameter lists. Its simply
+        a more convenient way of forming the param grid as opposed to passing a list of
+        all possible combinations. Defaults to None.
+    inputs (Dict[str, Any], optional): A dictionary of test inputs to pass into the
+        test. Inputs are either models or datasets that have been initialized using
+        vm.init_model() or vm.init_dataset(). Defaults to None.
+    input_grid (Union[Dict[str, List[Any]], List[Dict[str, Any]]], optional): To run
+        a comparison test, provide either a dictionary of inputs where the keys are
+        the input names and the values are lists of different inputs, or a list of
+        dictionaries where each dictionary is a set of inputs to run the test with.
+        This will run the test multiple times with different sets of inputs and then
+        combine the results into a single output. When passing a dictionary, the grid
+        will be created by taking the Cartesian product of the input lists. Its simply
+        a more convenient way of forming the input grid as opposed to passing a list of
+        all possible combinations. Defaults to None.
+    name (str, optional): The name of the test (used to create a composite metric
+        out of multiple unit metrics) - required when running multiple unit metrics
+    unit_metrics (list, optional): A list of unit metric IDs to run as a composite
+        metric - required when running multiple unit metrics
+    output_template (str, optional): A jinja2 html template to customize the output
+        of the test. Defaults to None.
+    show (bool, optional): Whether to display the results. Defaults to True.
+    **kwargs: Keyword inputs to pass into the test (same as `inputs` but as keyword
+        args instead of a dictionary):
+        - dataset: A validmind Dataset object or a Pandas DataFrame
+        - model: A model to use for the test
+        - models: A list of models to use for the test
+        - dataset: A validmind Dataset object or a Pandas DataFrame
     """
-    if not test_id and not name and not unit_metrics:
-        raise ValueError(
-            "`test_id` or `name` and `unit_metrics` must be provided to run a test"
-        )
-    if (unit_metrics and not name) or (name and not unit_metrics):
-        raise ValueError("`name` and `unit_metrics` must be provided together")
-    if (input_grid and kwargs) or (input_grid and inputs):
-        raise ValueError(
-            "When providing an `input_grid`, you cannot also provide `inputs` or `kwargs`"
-        )
+    # Validate input arguments with helper functions
+    validate_test_inputs(test_id, name, unit_metrics)
+    validate_grid_inputs(input_grid, kwargs, inputs, param_grid, params)
+    # Handle composite metric creation
     if unit_metrics:
-        metric_id_name = "".join(word[0].upper() + word[1:] for word in name.split())
-        test_id = f"validmind.composite_metric.{metric_id_name}" or test_id
+        test_id = generate_composite_test_id(name, test_id)
-    if input_grid:
-        return run_comparison_test(
+    # Run comparison tests if applicable
+    if input_grid or param_grid:
+        return run_comparison_test_with_grids(
             test_id,
+            inputs,
             input_grid,
-            name=name,
-            unit_metrics=unit_metrics,
-            params=params,
-            output_template=output_template,
-            show=show,
-            generate_description=__generate_description,
+            param_grid,
+            name,
+            unit_metrics,
+            params,
+            output_template,
+            show,
+            __generate_description,
         )
+    # Run unit metric tests
     if test_id.startswith("validmind.unit_metrics"):
         # TODO: as we move towards a more unified approach to metrics
         # we will want to make everything functional and remove the
         # separation between unit metrics and "normal" metrics
         return run_metric(test_id, inputs=inputs, params=params, show=show)
-    if unit_metrics:
-        error, TestClass = load_composite_metric(
-            unit_metrics=unit_metrics, metric_name=metric_id_name
-        )
-        if error:
-            raise LoadTestError(error)
-    else:
-        TestClass = load_test(test_id, reload=True)
+    # Load the appropriate test class
+    TestClass = load_test_class(test_id, unit_metrics, name)
+    # Create and run the test
     test = TestClass(
         test_id=test_id,
         context=TestContext(),
@@ -477,3 +493,90 @@ def run_test(
         test.result.show()
     return test.result
+def validate_test_inputs(test_id, name, unit_metrics):
+    """Validate the main test inputs for `test_id`, `name`, and `unit_metrics`."""
+    if not test_id and not (name and unit_metrics):
+        raise ValueError(
+            "`test_id` or both `name` and `unit_metrics` must be provided to run a test"
+        )
+    if bool(unit_metrics) != bool(name):
+        raise ValueError("`name` and `unit_metrics` must be provided together")
+def validate_grid_inputs(input_grid, kwargs, inputs, param_grid, params):
+    """Validate the grid inputs to avoid conflicting parameters."""
+    if input_grid and (kwargs or inputs):
+        raise ValueError("Cannot provide `input_grid` along with `inputs` or `kwargs`")
+    if param_grid and (kwargs or params):
+        raise ValueError("Cannot provide `param_grid` along with `params` or `kwargs`")
+def generate_composite_test_id(name, test_id):
+    """Generate a composite test ID if unit metrics are provided."""
+    metric_id_name = "".join(word.capitalize() for word in name.split())
+    return f"validmind.composite_metric.{metric_id_name}" or test_id
+def run_comparison_test_with_grids(
+    test_id,
+    inputs,
+    input_grid,
+    param_grid,
+    name,
+    unit_metrics,
+    params,
+    output_template,
+    show,
+    generate_description,
+):
+    """Run a comparison test based on the presence of input and param grids."""
+    if input_grid and param_grid:
+        return run_comparison_test(
+            test_id,
+            input_grid,
+            name=name,
+            unit_metrics=unit_metrics,
+            param_grid=param_grid,
+            output_template=output_template,
+            show=show,
+            generate_description=generate_description,
+        )
+    if input_grid:
+        return run_comparison_test(
+            test_id,
+            input_grid,
+            name=name,
+            unit_metrics=unit_metrics,
+            params=params,
+            output_template=output_template,
+            show=show,
+            generate_description=generate_description,
+        )
+    if param_grid:
+        return run_comparison_test(
+            test_id,
+            inputs=inputs,
+            name=name,
+            unit_metrics=unit_metrics,
+            param_grid=param_grid,
+            output_template=output_template,
+            show=show,
+            generate_description=generate_description,
+        )
+def load_test_class(test_id, unit_metrics, name):
+    """Load the appropriate test class based on `test_id` and unit metrics."""
+    if unit_metrics:
+        metric_id_name = "".join(word.capitalize() for word in name.split())
+        error, TestClass = load_composite_metric(
+            unit_metrics=unit_metrics, metric_name=metric_id_name
+        )
+        if error:
+            raise LoadTestError(error)
+        return TestClass
+    return load_test(test_id, reload=True)

validmind/vm_models/test/result_wrapper.py CHANGED Viewed

@@ -378,8 +378,8 @@ class MetricResultWrapper(ResultWrapper):
                 self.metric.summary = self._get_filtered_summary()
             tasks.append(
-                api_client.log_metrics(
-                    metrics=[self.metric],
+                api_client.log_metric_result(
+                    metric=self.metric,
                     inputs=self.inputs,
                     output_template=self.output_template,
                     section_id=section_id,
@@ -388,7 +388,7 @@ class MetricResultWrapper(ResultWrapper):
             )
         if self.figures:
-            tasks.append(api_client.log_figures(self.figures))
+            tasks.extend([api_client.log_figure(figure) for figure in self.figures])
         if hasattr(self, "result_metadata") and self.result_metadata:
             description = self.result_metadata[0].get("text", "")
@@ -474,7 +474,7 @@ class ThresholdTestResultWrapper(ResultWrapper):
         ]
         if self.figures:
-            tasks.append(api_client.log_figures(self.figures))
+            tasks.extend([api_client.log_figure(figure) for figure in self.figures])
         if hasattr(self, "result_metadata") and self.result_metadata:
             description = self.result_metadata[0].get("text", "")

{validmind-2.5.18.dist-info → validmind-2.5.23.dist-info}/METADATA RENAMED Viewed

@@ -1,7 +1,7 @@
 Metadata-Version: 2.1
 Name: validmind
-Version: 2.5.18
-Summary: ValidMind Developer Framework
+Version: 2.5.23
+Summary: ValidMind Library
 License: Commercial License
 Author: Andres Rodriguez
 Author-email: andres@validmind.ai
@@ -61,28 +61,28 @@ Requires-Dist: xgboost (>=1.5.2,<3)
 Requires-Dist: ydata-profiling
 Description-Content-Type: text/markdown
-# ValidMind Developer Framework
+# ValidMind Library
-ValidMind’s Python Developer Framework is a library of developer tools and methods designed to automate
+ValidMind’s Python Library contains a suite of developer tools and methods designed to automate
 the documentation and validation of your models.
-The Developer Framework is designed to be model agnostic. If your model is built in Python, ValidMind's
+The Library is designed to be model agnostic. If your model is built in Python, ValidMind's
 Python library will provide all the standard functionality without requiring your developers to rewrite any functions.
-The Developer Framework provides a rich suite of documentation tools and test suites, from documenting
+The Library provides a rich suite of documentation tools and test suites, from documenting
 descriptions of your dataset to testing your models for weak spots and overfit areas. The Developer
 Framework helps you automate the generation of model documentation by feeding the ValidMind platform with
 documentation artifacts and test results to the ValidMind platform.
 ## Installation
-To install the ValidMind Developer Framework and all optional dependencies, run:
+To install the ValidMind Library and all optional dependencies, run:
 ```bash
 pip install validmind[all]
 ```
-To install the Developer Framework without optional dependencies (core functionality only), run:
+To install the Library without optional dependencies (core functionality only), run:
 ```bash
 pip install validmind
@@ -90,7 +90,7 @@ pip install validmind
 ### Extra dependencies
-The Developer Framework has optional dependencies that can be installed separately to support additional model types and tests.
+The Library has optional dependencies that can be installed separately to support additional model types and tests.
 - **LLM Support**: To be able to run tests for Large Language Models (LLMs), install the `llm` extra:
@@ -98,19 +98,19 @@ The Developer Framework has optional dependencies that can be installed separate
     pip install validmind[llm]
     ```
-- **PyTorch Models**: To use pytorch models with the Developer Framework, install the `torch` extra:
+- **PyTorch Models**: To use pytorch models with the Library, install the `torch` extra:
     ```bash
     pip install validmind[torch]
     ```
-- **Hugging Face Transformers**: To use Hugging Face Transformers models with the Developer Framework, install the `transformers` extra:
+- **Hugging Face Transformers**: To use Hugging Face Transformers models with the Library, install the `transformers` extra:
     ```bash
     pip install validmind[transformers]
     ```
-- **R Models**: To use R models with the Developer Framework, install the `r` extra:
+- **R Models**: To use R models with the Library, install the `r` extra:
     ```bash
     pip install validmind[r-support]

validmind 2.5.18__py3-none-any.whl → 2.5.23__py3-none-any.whl

validmind 2.5.18py3-none-any.whl → 2.5.23py3-none-any.whl