PyPI - validmind - Versions diffs - 2.8.12__py3-none-any.whl → 2.8.22__py3-none-any.whl - Mend

validmind 2.8.12py3-none-any.whl → 2.8.22py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (61) hide show

validmind/__init__.py +6 -5
validmind/__version__.py +1 -1
validmind/ai/test_descriptions.py +13 -9
validmind/ai/utils.py +2 -2
validmind/api_client.py +75 -32
validmind/client.py +111 -100
validmind/client_config.py +3 -3
validmind/datasets/classification/__init__.py +7 -3
validmind/datasets/credit_risk/lending_club.py +28 -16
validmind/datasets/nlp/cnn_dailymail.py +10 -4
validmind/datasets/regression/__init__.py +22 -5
validmind/errors.py +17 -7
validmind/input_registry.py +1 -1
validmind/logging.py +44 -35
validmind/models/foundation.py +2 -2
validmind/models/function.py +10 -3
validmind/template.py +33 -24
validmind/test_suites/__init__.py +2 -2
validmind/tests/_store.py +13 -4
validmind/tests/comparison.py +65 -33
validmind/tests/data_validation/ClassImbalance.py +3 -1
validmind/tests/data_validation/DatasetDescription.py +2 -23
validmind/tests/data_validation/DescriptiveStatistics.py +1 -1
validmind/tests/data_validation/Skewness.py +7 -6
validmind/tests/decorator.py +14 -11
validmind/tests/load.py +38 -24
validmind/tests/model_validation/ragas/AnswerCorrectness.py +4 -2
validmind/tests/model_validation/ragas/ContextEntityRecall.py +4 -2
validmind/tests/model_validation/ragas/ContextPrecision.py +4 -2
validmind/tests/model_validation/ragas/ContextPrecisionWithoutReference.py +4 -2
validmind/tests/model_validation/ragas/ContextRecall.py +4 -2
validmind/tests/model_validation/ragas/Faithfulness.py +4 -2
validmind/tests/model_validation/ragas/ResponseRelevancy.py +4 -2
validmind/tests/model_validation/ragas/SemanticSimilarity.py +4 -2
validmind/tests/model_validation/sklearn/ClassifierThresholdOptimization.py +13 -3
validmind/tests/model_validation/sklearn/OverfitDiagnosis.py +3 -1
validmind/tests/model_validation/sklearn/SHAPGlobalImportance.py +28 -25
validmind/tests/model_validation/sklearn/WeakspotsDiagnosis.py +15 -10
validmind/tests/output.py +66 -11
validmind/tests/run.py +28 -14
validmind/tests/test_providers.py +28 -35
validmind/tests/utils.py +17 -4
validmind/unit_metrics/__init__.py +1 -1
validmind/utils.py +295 -31
validmind/vm_models/dataset/dataset.py +83 -43
validmind/vm_models/dataset/utils.py +5 -3
validmind/vm_models/figure.py +6 -6
validmind/vm_models/input.py +6 -5
validmind/vm_models/model.py +5 -5
validmind/vm_models/result/result.py +122 -43
validmind/vm_models/result/utils.py +5 -5
validmind/vm_models/test_suite/__init__.py +5 -0
validmind/vm_models/test_suite/runner.py +5 -5
validmind/vm_models/test_suite/summary.py +20 -2
validmind/vm_models/test_suite/test.py +6 -6
validmind/vm_models/test_suite/test_suite.py +10 -10
{validmind-2.8.12.dist-info → validmind-2.8.22.dist-info}/METADATA +3 -4
{validmind-2.8.12.dist-info → validmind-2.8.22.dist-info}/RECORD +61 -60
{validmind-2.8.12.dist-info → validmind-2.8.22.dist-info}/WHEEL +1 -1
{validmind-2.8.12.dist-info → validmind-2.8.22.dist-info}/LICENSE +0 -0
{validmind-2.8.12.dist-info → validmind-2.8.22.dist-info}/entry_points.txt +0 -0

validmind/tests/model_validation/sklearn/WeakspotsDiagnosis.py CHANGED Viewed

@@ -47,7 +47,7 @@ def _compute_metrics(
         None: The computed metrics are appended to the `results` dictionary in-place.
     """
     results["Slice"].append(str(region))
-    results["Shape"].append(df_region.shape[0])
+    results["Number of Records"].append(df_region.shape[0])
     results["Feature"].append(feature_column)
     # Check if df_region is an empty dataframe and if so, append 0 to all metrics
@@ -222,7 +222,7 @@ def WeakspotsDiagnosis(
     thresholds = thresholds or DEFAULT_THRESHOLDS
     thresholds = {k.title(): v for k, v in thresholds.items()}
-    results_headers = ["Slice", "Shape", "Feature"]
+    results_headers = ["Slice", "Number of Records", "Feature"]
     results_headers.extend(metrics.keys())
     figures = []
@@ -236,19 +236,20 @@ def WeakspotsDiagnosis(
         feature_columns
         + [datasets[1].target_column, datasets[1].prediction_column(model)]
     ]
+    results_1 = pd.DataFrame()
+    results_2 = pd.DataFrame()
     for feature in feature_columns:
         bins = 10
         if feature in datasets[0].feature_columns_categorical:
             bins = len(df_1[feature].unique())
         df_1["bin"] = pd.cut(df_1[feature], bins=bins)
-        results_1 = {k: [] for k in results_headers}
-        results_2 = {k: [] for k in results_headers}
+        r1 = {k: [] for k in results_headers}
+        r2 = {k: [] for k in results_headers}
         for region, df_region in df_1.groupby("bin"):
             _compute_metrics(
-                results=results_1,
+                results=r1,
                 metrics=metrics,
                 region=region,
                 df_region=df_region,
@@ -260,7 +261,7 @@ def WeakspotsDiagnosis(
                 (df_2[feature] > region.left) & (df_2[feature] <= region.right)
             ]
             _compute_metrics(
-                results=results_2,
+                results=r2,
                 metrics=metrics,
                 region=region,
                 df_region=df_2_region,
@@ -271,8 +272,8 @@ def WeakspotsDiagnosis(
         for metric in metrics.keys():
             fig, df = _plot_weak_spots(
-                results_1=results_1,
-                results_2=results_2,
+                results_1=r1,
+                results_2=r2,
                 feature_column=feature,
                 metric=metric,
                 threshold=thresholds[metric],
@@ -284,6 +285,8 @@ def WeakspotsDiagnosis(
         # rely on visual assessment for this test for now.
         if not df[df[list(thresholds.keys())].lt(thresholds).any(axis=1)].empty:
             passed = False
+        results_1 = pd.concat([results_1, pd.DataFrame(r1)])
+        results_2 = pd.concat([results_2, pd.DataFrame(r2)])
     return (
         pd.concat(
@@ -291,7 +294,9 @@ def WeakspotsDiagnosis(
                 pd.DataFrame(results_1).assign(Dataset=datasets[0].input_id),
                 pd.DataFrame(results_2).assign(Dataset=datasets[1].input_id),
             ]
-        ).sort_values(["Feature", "Dataset"]),
+        )
+        .reset_index(drop=True)
+        .sort_values(["Feature", "Dataset"]),
         *figures,
         passed,
     )

validmind/tests/output.py CHANGED Viewed

@@ -9,6 +9,7 @@ from uuid import uuid4
 import numpy as np
 import pandas as pd
+from validmind.utils import is_html, md_to_html
 from validmind.vm_models.figure import (
     Figure,
     is_matplotlib_figure,
@@ -77,30 +78,72 @@ class FigureOutputHandler(OutputHandler):
 class TableOutputHandler(OutputHandler):
     def can_handle(self, item: Any) -> bool:
-        return isinstance(item, (list, pd.DataFrame, dict, ResultTable))
+        return isinstance(item, (list, pd.DataFrame, dict, ResultTable, tuple))
+    def _convert_simple_type(self, data: Any) -> pd.DataFrame:
+        """Convert a simple data type to a DataFrame."""
+        if isinstance(data, dict):
+            return pd.DataFrame([data])
+        elif data is None:
+            return pd.DataFrame()
+        else:
+            raise ValueError(f"Cannot convert {type(data)} to DataFrame")
+    def _convert_list(self, data_list: List) -> pd.DataFrame:
+        """Convert a list to a DataFrame."""
+        if not data_list:
+            return pd.DataFrame()
+        try:
+            return pd.DataFrame(data_list)
+        except Exception as e:
+            # If conversion fails, try to handle common cases
+            if all(
+                isinstance(item, (int, float, str, bool, type(None)))
+                for item in data_list
+            ):
+                return pd.DataFrame({"Values": data_list})
+            else:
+                raise ValueError(f"Could not convert list to DataFrame: {e}")
+    def _convert_to_dataframe(self, table_data: Any) -> pd.DataFrame:
+        """Convert various data types to a pandas DataFrame."""
+        # Handle special cases by type
+        if isinstance(table_data, pd.DataFrame):
+            return table_data
+        elif isinstance(table_data, (dict, str, type(None))):
+            return self._convert_simple_type(table_data)
+        elif isinstance(table_data, tuple):
+            return self._convert_list(list(table_data))
+        elif isinstance(table_data, list):
+            return self._convert_list(table_data)
+        else:
+            # If we reach here, we don't know how to handle this type
+            raise ValueError(
+                f"Invalid table format: must be a list of dictionaries or a DataFrame, got {type(table_data)}"
+            )
     def process(
         self,
-        item: Union[List[Dict[str, Any]], pd.DataFrame, Dict[str, Any], ResultTable],
+        item: Union[
+            List[Dict[str, Any]], pd.DataFrame, Dict[str, Any], ResultTable, str, tuple
+        ],
         result: TestResult,
     ) -> None:
+        # Convert to a dictionary of tables if not already
         tables = item if isinstance(item, dict) else {"": item}
         for table_name, table_data in tables.items():
-            # if already a ResultTable, add it directly
+            # If already a ResultTable, add it directly
             if isinstance(table_data, ResultTable):
                 result.add_table(table_data)
                 continue
-            if not isinstance(table_data, (list, pd.DataFrame)):
-                raise ValueError(
-                    "Invalid table format: must be a list of dictionaries or a DataFrame"
-                )
-            if isinstance(table_data, list):
-                table_data = pd.DataFrame(table_data)
+            # Convert the data to a DataFrame using our helper method
+            df = self._convert_to_dataframe(table_data)
-            result.add_table(ResultTable(data=table_data, title=table_name or None))
+            # Add the resulting DataFrame as a table to the resul
+            result.add_table(ResultTable(data=df, title=table_name or None))
 class RawDataOutputHandler(OutputHandler):
@@ -111,6 +154,17 @@ class RawDataOutputHandler(OutputHandler):
         result.raw_data = item
+class StringOutputHandler(OutputHandler):
+    def can_handle(self, item: Any) -> bool:
+        return isinstance(item, str)
+    def process(self, item: Any, result: TestResult) -> None:
+        if not is_html(item):
+            item = md_to_html(item, mathml=True)
+        result.description = item
 def process_output(item: Any, result: TestResult) -> None:
     """Process a single test output item and update the TestResult."""
     handlers = [
@@ -119,6 +173,7 @@ def process_output(item: Any, result: TestResult) -> None:
         FigureOutputHandler(),
         TableOutputHandler(),
         RawDataOutputHandler(),
+        StringOutputHandler(),
     ]
     for handler in handlers:

validmind/tests/run.py CHANGED Viewed

@@ -76,7 +76,7 @@ def _get_run_metadata(**metadata: Dict[str, Any]) -> Dict[str, Any]:
 def _get_test_kwargs(
     test_func: callable, inputs: Dict[str, Any], params: Dict[str, Any]
-):
+) -> Tuple[Dict[str, Any], Dict[str, Any]]:
     """Insepect function signature to build kwargs to pass the inputs and params
     that the test function expects
@@ -93,7 +93,7 @@ def _get_test_kwargs(
         params (dict): Test parameters e.g. {"param1": 1, "param2": 2}
     Returns:
-        tuple: Tuple of input and param kwargs
+        Tuple[Dict[str, Any], Dict[str, Any]]: Tuple of input and param kwargs
     """
     input_kwargs = {}  # map function inputs (`dataset` etc) to actual objects
@@ -222,6 +222,7 @@ def _run_comparison_test(
     params: Union[Dict[str, Any], None],
     param_grid: Union[Dict[str, List[Any]], List[Dict[str, Any]], None],
     title: Optional[str] = None,
+    show_params: bool = True,
 ):
     """Run a comparison test i.e. a test that compares multiple outputs of a test across
     different input and/or param combinations"""
@@ -242,6 +243,7 @@ def _run_comparison_test(
             show=False,
             generate_description=False,
             title=title,
+            show_params=show_params,
         )
         for config in run_test_configs
     ]
@@ -253,7 +255,9 @@ def _run_comparison_test(
     else:
         test_doc = describe_test(test_id, raw=True)["Description"]
-    combined_outputs, combined_inputs, combined_params = combine_results(results)
+    combined_outputs, combined_inputs, combined_params = combine_results(
+        results, show_params
+    )
     return build_test_result(
         outputs=combined_outputs,
@@ -265,7 +269,12 @@ def _run_comparison_test(
     )
-def _run_test(test_id: TestID, inputs: Dict[str, Any], params: Dict[str, Any]):
+def _run_test(
+    test_id: TestID,
+    inputs: Dict[str, Any],
+    params: Dict[str, Any],
+    title: Optional[str] = None,
+):
     """Run a standard test and return a TestResult object"""
     test_func = load_test(test_id)
     input_kwargs, param_kwargs = _get_test_kwargs(
@@ -282,6 +291,7 @@ def _run_test(test_id: TestID, inputs: Dict[str, Any], params: Dict[str, Any]):
         test_doc=getdoc(test_func),
         inputs=input_kwargs,
         params=param_kwargs,
+        title=title,
     )
@@ -297,6 +307,7 @@ def run_test(  # noqa: C901
     generate_description: bool = True,
     title: Optional[str] = None,
     post_process_fn: Union[Callable[[TestResult], None], None] = None,
+    show_params: bool = True,
     **kwargs,
 ) -> TestResult:
     """Run a ValidMind or custom test
@@ -321,6 +332,7 @@ def run_test(  # noqa: C901
         generate_description (bool, optional): Whether to generate a description. Defaults to True.
         title (str, optional): Custom title for the test result
         post_process_fn (Callable[[TestResult], None], optional): Function to post-process the test result
+        show_params (bool, optional): Whether to include parameter values in figure titles for comparison tests. Defaults to True.
     Returns:
         TestResult: A TestResult object containing the test results
@@ -358,6 +370,7 @@ def run_test(  # noqa: C901
             input_grid=input_grid,
             params=params,
             param_grid=param_grid,
+            show_params=show_params,
         )
     elif unit_metrics:
@@ -375,7 +388,7 @@ def run_test(  # noqa: C901
         )
     else:
-        result = _run_test(test_id, inputs, params)
+        result = _run_test(test_id, inputs, params, title)
     end_time = time.perf_counter()
     result.metadata = _get_run_metadata(duration_seconds=end_time - start_time)
@@ -383,15 +396,16 @@ def run_test(  # noqa: C901
     if post_process_fn:
         result = post_process_fn(result)
-    result.description = get_result_description(
-        test_id=test_id,
-        test_description=result.doc,
-        tables=result.tables,
-        figures=result.figures,
-        metric=result.metric,
-        should_generate=generate_description,
-        title=title,
-    )
+    if not result.description:
+        result.description = get_result_description(
+            test_id=test_id,
+            test_description=result.doc,
+            tables=result.tables,
+            figures=result.figures,
+            metric=result.metric,
+            should_generate=generate_description,
+            title=title,
+        )
     if show:
         result.show()

validmind/tests/test_providers.py CHANGED Viewed

@@ -7,7 +7,7 @@ import os
 import re
 import sys
 from pathlib import Path
-from typing import List, Protocol
+from typing import Any, Callable, List, Protocol
 from validmind.logging import get_logger
@@ -95,45 +95,38 @@ class LocalTestProvider:
         """
         self.root_folder = os.path.abspath(root_folder)
-    def list_tests(self):
+    def list_tests(self) -> List[str]:
         """List all tests in the given namespace
         Returns:
             list: A list of test IDs
         """
-        test_ids = []
+        test_files = []
         for root, _, files in os.walk(self.root_folder):
-            for filename in files:
-                if not filename.endswith(".py") or filename.startswith("__"):
-                    continue
-                path = Path(root) / filename
-                if not _is_test_file(path):
+            for file in files:
+                if not file.endswith(".py"):
                     continue
-                rel_path = path.relative_to(self.root_folder)
-                test_id_parts = [p.stem for p in rel_path.parents if p.stem][::-1]
-                test_id_parts.append(path.stem)
-                test_ids.append(".".join(test_id_parts))
+                path = Path(os.path.join(root, file))
+                if _is_test_file(path):
+                    rel_path = os.path.relpath(path, self.root_folder)
+                    test_id = os.path.splitext(rel_path)[0].replace(os.sep, ".")
+                    test_files.append(test_id)
-        return sorted(test_ids)
+        return test_files
-    def load_test(self, test_id: str):
-        """
-        Load the test identified by the given test_id.
+    def load_test(self, test_id: str) -> Callable[..., Any]:
+        """Load the test function identified by the given test_id
         Args:
-            test_id (str): The identifier of the test. This corresponds to the relative
-            path of the python file from the root folder, with slashes replaced by dots
+            test_id (str): The test ID (does not contain the namespace under which
+                the test is registered)
         Returns:
-            The test class that matches the last part of the test_id.
+            callable: The test function
         Raises:
-            LocalTestProviderLoadModuleError: If the test module cannot be imported
-            LocalTestProviderLoadTestError: If the test class cannot be found in the module
+            FileNotFoundError: If the test is not found
         """
         # Convert test_id to file path
         file_path = os.path.join(self.root_folder, f"{test_id.replace('.', '/')}.py")
@@ -162,28 +155,28 @@ class LocalTestProvider:
 class ValidMindTestProvider:
-    """Test provider for ValidMind tests"""
+    """Provider for built-in ValidMind tests"""
-    def __init__(self):
+    def __init__(self) -> None:
         # two subproviders: unit_metrics and normal tests
-        self.metrics_provider = LocalTestProvider(
+        self.unit_metrics_provider = LocalTestProvider(
             os.path.join(os.path.dirname(__file__), "..", "unit_metrics")
         )
-        self.tests_provider = LocalTestProvider(os.path.dirname(__file__))
+        self.test_provider = LocalTestProvider(os.path.dirname(__file__))
     def list_tests(self) -> List[str]:
-        """List all tests in the ValidMind test provider"""
+        """List all tests in the given namespace"""
         metric_ids = [
-            f"unit_metrics.{test}" for test in self.metrics_provider.list_tests()
+            f"unit_metrics.{test}" for test in self.unit_metrics_provider.list_tests()
         ]
-        test_ids = self.tests_provider.list_tests()
+        test_ids = self.test_provider.list_tests()
         return metric_ids + test_ids
-    def load_test(self, test_id: str) -> callable:
-        """Load a ValidMind test or unit metric"""
+    def load_test(self, test_id: str) -> Callable[..., Any]:
+        """Load the test function identified by the given test_id"""
         return (
-            self.metrics_provider.load_test(test_id.replace("unit_metrics.", ""))
+            self.unit_metrics_provider.load_test(test_id.replace("unit_metrics.", ""))
             if test_id.startswith("unit_metrics.")
-            else self.tests_provider.load_test(test_id)
+            else self.test_provider.load_test(test_id)
         )

validmind/tests/utils.py CHANGED Viewed

@@ -5,6 +5,7 @@
 """Test Module Utils"""
 import inspect
+from typing import Any, Optional, Tuple, Type, Union
 import numpy as np
 import pandas as pd
@@ -14,7 +15,7 @@ from validmind.logging import get_logger
 logger = get_logger(__name__)
-def test_description(test_class, truncate=True):
+def test_description(test_class: Type[Any], truncate: bool = True) -> str:
     description = inspect.getdoc(test_class).strip()
     if truncate and len(description.split("\n")) > 5:
@@ -23,7 +24,11 @@ def test_description(test_class, truncate=True):
     return description
-def remove_nan_pairs(y_true, y_pred, dataset_id=None):
+def remove_nan_pairs(
+    y_true: Union[np.ndarray, list],
+    y_pred: Union[np.ndarray, list],
+    dataset_id: Optional[str] = None,
+) -> Tuple[np.ndarray, np.ndarray]:
     """
     Remove pairs where either true or predicted values are NaN/None.
     Args:
@@ -52,7 +57,11 @@ def remove_nan_pairs(y_true, y_pred, dataset_id=None):
     return y_true, y_pred
-def ensure_equal_lengths(y_true, y_pred, dataset_id=None):
+def ensure_equal_lengths(
+    y_true: Union[np.ndarray, list],
+    y_pred: Union[np.ndarray, list],
+    dataset_id: Optional[str] = None,
+) -> Tuple[np.ndarray, np.ndarray]:
     """
     Check if true and predicted values have matching lengths, log warning if they don't,
     and truncate to the shorter length if necessary. Also removes any NaN/None values.
@@ -82,7 +91,11 @@ def ensure_equal_lengths(y_true, y_pred, dataset_id=None):
     return y_true, y_pred
-def validate_prediction(y_true, y_pred, dataset_id=None):
+def validate_prediction(
+    y_true: Union[np.ndarray, list],
+    y_pred: Union[np.ndarray, list],
+    dataset_id: Optional[str] = None,
+) -> Tuple[np.ndarray, np.ndarray]:
     """
     Comprehensive validation of true and predicted value pairs.
     Handles NaN/None values and length mismatches.

validmind/unit_metrics/__init__.py CHANGED Viewed

@@ -10,7 +10,7 @@ from validmind.tests.run import run_test
 def list_metrics(**kwargs):
     """List all metrics"""
     vm_provider = test_provider_store.get_test_provider("validmind")
-    vm_metrics_provider = vm_provider.metrics_provider
+    vm_metrics_provider = vm_provider.unit_metrics_provider
     prefix = "validmind.unit_metrics."

validmind 2.8.12__py3-none-any.whl → 2.8.22__py3-none-any.whl

validmind 2.8.12py3-none-any.whl → 2.8.22py3-none-any.whl