PyPI - validmind - Versions diffs - 2.0.7__py3-none-any.whl → 2.1.0__py3-none-any.whl - Mend

validmind 2.0.7py3-none-any.whl → 2.1.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (108) hide show

validmind/tests/__init__.py CHANGED Viewed

@@ -5,24 +5,26 @@
 """All Tests for ValidMind"""
 import importlib
+import inspect
 import sys
 from pathlib import Path
 from pprint import pformat
 from typing import Dict
+import mistune
 import pandas as pd
 from IPython.display import display
 from ipywidgets import HTML
-from markdown import markdown
 from ..errors import LoadTestError
 from ..html_templates.content_blocks import test_content_block_html
 from ..logging import get_logger
+from ..unit_metrics import run_metric
 from ..unit_metrics.composite import load_composite_metric
-from ..utils import clean_docstring, format_dataframe, fuzzy_match, test_id_to_name
+from ..utils import format_dataframe, fuzzy_match, test_id_to_name
 from ..vm_models import TestContext, TestInput
-from .__types__ import ExternalTestProvider
-from .test_providers import GithubTestProvider, LocalTestProvider
+from .decorator import metric, tags, tasks
+from .test_providers import LocalTestProvider, TestProvider
 logger = get_logger(__name__)
@@ -35,23 +37,28 @@ __all__ = [
     "load_test",
     "describe_test",
     "register_test_provider",
-    "GithubTestProvider",
     "LoadTestError",
     "LocalTestProvider",
+    # Decorators for functional metrics
+    "metric",
+    "tags",
+    "tasks",
 ]
 __tests = None
 __test_classes = None
-__test_providers: Dict[str, ExternalTestProvider] = {}
+__test_providers: Dict[str, TestProvider] = {}
 __custom_tests: Dict[str, object] = {}
 def _test_description(test_class, truncate=True):
-    if truncate and len(test_class.__doc__.split("\n")) > 5:
-        return test_class.__doc__.strip().split("\n")[0] + "..."
+    description = inspect.getdoc(test_class).strip()
-    return test_class.__doc__
+    if truncate and len(description.split("\n")) > 5:
+        return description.strip().split("\n")[0] + "..."
+    return description
 def _load_tests(test_ids):
@@ -251,61 +258,83 @@ def list_tests(filter=None, task=None, tags=None, pretty=True, truncate=True):
     return tests
-def load_test(test_id, reload=False):  # noqa: C901
-    # Extract the test ID extension from the actual test ID when loading
-    # the test class. This enables us to generate multiple results for
-    # the same tests within the document. For instance, consider the
-    # test ID "validmind.data_validation.ClassImbalance:data_id_1,"
-    # where the test ID extension is "data_id_1".
+def _load_validmind_test(test_id, reload=False):
     parts = test_id.split(":")[0].split(".")
+    test_module = ".".join(parts[1:-1])
+    test_class = parts[-1]
+    error = None
+    test = None
+    try:
+        full_path = f"validmind.tests.{test_module}.{test_class}"
+        if reload and full_path in sys.modules:
+            module = importlib.reload(sys.modules[full_path])
+        else:
+            module = importlib.import_module(full_path)
+        test = getattr(module, test_class)
+    except ModuleNotFoundError as e:
+        error = f"Unable to load test {test_id}. {e}"
+    except AttributeError:
+        error = f"Unable to load test {test_id}. Test not in module: {test_class}"
+    return error, test
+def load_test(test_id: str, reload=False):
+    """Load a test by test ID
+    Test IDs are in the format `namespace.path_to_module.TestClassOrFuncName[:result_id]`.
+    The result ID is optional and is used to distinguish between multiple results from the
+    running the same test.
+    Args:
+        test_id (str): The test ID in the format `namespace.path_to_module.TestName[:result_id]`
+        reload (bool, optional): Whether to reload the test module. Defaults to False.
+    """
+    # TODO: we should use a dedicated class for test IDs to handle this consistently
+    test_id, result_id = test_id.split(":", 1) if ":" in test_id else (test_id, None)
     error = None
-    namespace = parts[0]
+    namespace = test_id.split(".", 1)[0]
-    if test_id.split(":")[0] in __custom_tests:
-        test = __custom_tests[test_id.split(":")[0]]
+    # TODO: lets implement an extensible loading system instead of this ugly if/else
+    if test_id in __custom_tests:
+        test = __custom_tests[test_id]
     elif test_id.startswith("validmind.composite_metric"):
-        test = load_composite_metric(test_id)
+        error, test = load_composite_metric(test_id)
     elif namespace == "validmind":
-        test_module = ".".join(parts[1:-1])
-        test_class = parts[-1]
-        try:
-            full_path = f"validmind.tests.{test_module}.{test_class}"
-            if reload and full_path in sys.modules:
-                module = importlib.reload(sys.modules[full_path])
-            else:
-                module = importlib.import_module(full_path)
-            test = getattr(module, test_class)
-        except ModuleNotFoundError as e:
-            error = f"Unable to load test {test_id}. {e}"
-        except AttributeError:
-            error = f"Unable to load test {test_id}. Class not in module: {test_class}"
-    elif namespace != "validmind" and namespace not in __test_providers:
-        error = (
-            f"Unable to load test {test_id}. "
-            f"No Test Provider found for the namespace: {namespace}."
-        )
+        error, test = _load_validmind_test(test_id, reload=reload)
     elif namespace in __test_providers:
         try:
             test = __test_providers[namespace].load_test(test_id.split(".", 1)[1])
         except Exception as e:
             error = (
-                f"Unable to load test {test_id} from test  provider: "
+                f"Unable to load test {test_id} from test provider: "
                 f"{__test_providers[namespace]}\n Got Exception: {e}"
             )
+    else:
+        error = f"Unable to load test {test_id}. No test provider found."
     if error:
         logger.error(error)
         raise LoadTestError(error)
-    test.test_id = test_id
+    if inspect.isfunction(test):
+        # if its a function, we decorate it and then load the class
+        # TODO: simplify this as we move towards all functional metrics
+        # "_" is used here so it doesn't conflict with other test ids
+        metric("_")(test)
+        test = __custom_tests["_"]
+    test.test_id = f"{test_id}:{result_id}" if result_id else test_id
     return test
@@ -330,7 +359,7 @@ def describe_test(test_id: str = None, raw: bool = False):
         "Test Type": test.test_type,
         "Required Inputs": test.required_inputs,
         "Params": test.default_params or {},
-        "Description": clean_docstring(test.__doc__),
+        "Description": inspect.getdoc(test).strip() or "",
     }
     if raw:
@@ -340,7 +369,7 @@ def describe_test(test_id: str = None, raw: bool = False):
         HTML(
             test_content_block_html.format(
                 title=f'{details["Name"]}',
-                description=markdown(details["Description"]),
+                description=mistune.html(details["Description"].strip()),
                 required_inputs=", ".join(details["Required Inputs"] or ["None"]),
                 params_table="\n".join(
                     [
@@ -361,6 +390,7 @@ def run_test(
     params: dict = None,
     inputs=None,
     output_template=None,
+    show=True,
     **kwargs,
 ):
     """Run a test by test ID
@@ -375,6 +405,7 @@ def run_test(
         params (dict, optional): A dictionary of params to override the default params
         inputs: A dictionary of test inputs to pass to the Test
         output_template (str, optional): A template to use for customizing the output
+        show (bool, optional): Whether to display the results. Defaults to True.
         **kwargs: Any extra arguments will be passed in via the TestInput object. i.e.:
             - dataset: A validmind Dataset object or a Pandas DataFrame
             - model: A model to use for the test
@@ -389,9 +420,23 @@ def run_test(
     if (unit_metrics and not name) or (name and not unit_metrics):
         raise ValueError("`name` and `unit_metrics` must be provided together")
+    if test_id and test_id.startswith("validmind.unit_metrics"):
+        # TODO: as we move towards a more unified approach to metrics
+        # we will want to make everything functional and remove the
+        # separation between unit metrics and "normal" metrics
+        return run_metric(test_id, inputs=inputs, params=params, show=show)
     if unit_metrics:
-        TestClass = load_composite_metric(unit_metrics=unit_metrics, metric_name=name)
-        test_id = f"validmind.composite_metric.{name}"
+        metric_id_name = "".join(word[0].upper() + word[1:] for word in name.split())
+        test_id = f"validmind.composite_metric.{metric_id_name}"
+        error, TestClass = load_composite_metric(
+            unit_metrics=unit_metrics, metric_name=metric_id_name
+        )
+        if error:
+            raise LoadTestError(error)
     else:
         TestClass = load_test(test_id, reload=True)
@@ -404,17 +449,19 @@ def run_test(
     )
     test.run()
-    test.result.show()
+    if show:
+        test.result.show()
     return test.result
-def register_test_provider(namespace: str, test_provider: ExternalTestProvider) -> None:
+def register_test_provider(namespace: str, test_provider: TestProvider) -> None:
     """Register an external test provider
     Args:
         namespace (str): The namespace of the test provider
-        test_provider (ExternalTestProvider): The test provider
+        test_provider (TestProvider): The test provider
     """
     __test_providers[namespace] = test_provider

validmind/tests/data_validation/FeatureTargetCorrelationPlot.py CHANGED Viewed

@@ -74,7 +74,9 @@ class FeatureTargetCorrelationPlot(Metric):
     def visualize_feature_target_correlation(self, df, target_column, fig_height):
         # Compute correlations with the target variable
-        correlations = df.corr(numeric_only=True)[target_column].drop(target_column)
+        correlations = (
+            df.corr(numeric_only=True)[target_column].drop(target_column).to_frame()
+        )
         correlations = correlations.loc[:, ~correlations.columns.duplicated()]
         correlations = correlations.sort_values(by=target_column, ascending=True)

validmind/tests/data_validation/PiTCreditScoresHistogram.py CHANGED Viewed

@@ -113,7 +113,7 @@ class PiTCreditScoresHistogram(Metric):
         )
         predicted_default_column = (
             self.params.get("predicted_default_column")
-            or self.inputs.dataset.y_pred(self.inputs.model.input_id),
+            or self.inputs.dataset.y_pred(self.inputs.model),
         )
         scores_column = self.params["scores_column"]
         point_in_time_column = self.params["point_in_time_column"]

validmind/tests/data_validation/ScatterPlot.py CHANGED Viewed

@@ -65,8 +65,14 @@ class ScatterPlot(Metric):
         if not set(columns).issubset(set(df.columns)):
             raise ValueError("Provided 'columns' must exist in the dataset")
-        sns.pairplot(data=df, diag_kind="kde")
+        g = sns.pairplot(data=df, diag_kind="kde")
+        for ax in g.axes.flatten():
+            # rotate x axis labels
+            ax.set_xlabel(ax.get_xlabel(), rotation=45)
+            # rotate y axis labels
+            ax.set_ylabel(ax.get_ylabel(), rotation=45)
+            # set y labels alignment
+            ax.yaxis.get_label().set_horizontalalignment("right")
         # Get the current figure
         fig = plt.gcf()

validmind/tests/decorator.py CHANGED Viewed

@@ -4,13 +4,17 @@
 """Decorators for creating and registering metrics with the ValidMind framework."""
+# TODO: as we move entirely to a functional approach a lot of this logic
+# should be moved into the __init__ to replace the old class-based stuff
 import inspect
+import os
 from uuid import uuid4
 import pandas as pd
+from validmind.errors import MissingRequiredTestInputError
 from validmind.logging import get_logger
-from validmind.utils import clean_docstring
 from validmind.vm_models import (
     Metric,
     MetricResult,
@@ -26,8 +30,6 @@ from validmind.vm_models.figure import (
 )
 from validmind.vm_models.test.result_wrapper import MetricResultWrapper
-from . import _register_custom_test
 logger = get_logger(__name__)
@@ -53,7 +55,7 @@ def _inspect_signature(test_func: callable):
     return inputs, params
-def _build_result(results, test_id, description, output_template):
+def _build_result(results, test_id, description, output_template, inputs):  # noqa: C901
     ref_id = str(uuid4())
     figure_metadata = {
         "_type": "metric",
@@ -65,7 +67,17 @@ def _build_result(results, test_id, description, output_template):
     figures = []
     def process_item(item):
-        if is_matplotlib_figure(item) or is_plotly_figure(item) or is_png_image(item):
+        # TOOD: build out a more robust/extensible system for this
+        # TODO: custom type handlers would be really cool
+        # unit metrics (scalar values) - show in a simple table for now
+        if isinstance(item, int) or isinstance(item, float) or isinstance(item, str):
+            tables.append(ResultTable(data=[{test_id.split(".")[-1]: item}]))
+        # plots
+        elif isinstance(item, Figure):
+            figures.append(item)
+        elif is_matplotlib_figure(item) or is_plotly_figure(item) or is_png_image(item):
             figures.append(
                 Figure(
                     key=f"{test_id}:{len(figures) + 1}",
@@ -73,18 +85,24 @@ def _build_result(results, test_id, description, output_template):
                     metadata=figure_metadata,
                 )
             )
-        elif isinstance(item, list):
-            tables.append(ResultTable(data=item))
-        elif isinstance(item, pd.DataFrame):
+        # tables
+        elif isinstance(item, list) or isinstance(item, pd.DataFrame):
             tables.append(ResultTable(data=item))
         elif isinstance(item, dict):
             for table_name, table in item.items():
+                if not isinstance(table, list) and not isinstance(table, pd.DataFrame):
+                    raise ValueError(
+                        f"Invalid table format: {table_name} must be a list or DataFrame"
+                    )
                 tables.append(
                     ResultTable(
                         data=table,
                         metadata=ResultTableMetadata(title=table_name),
                     )
                 )
         else:
             raise ValueError(f"Invalid return type: {type(item)}")
@@ -107,17 +125,23 @@ def _build_result(results, test_id, description, output_template):
         result_metadata=[
             {
                 "content_id": f"metric_description:{test_id}",
-                "text": clean_docstring(description),
+                "text": description,
             }
         ],
-        inputs=[],
+        inputs=inputs,
         output_template=output_template,
     )
-def get_run_method(func, inputs, params):
+def _get_run_method(func, inputs, params):
     def run(self: Metric):
-        input_kwargs = {k: getattr(self.inputs, k) for k in inputs.keys()}
+        input_kwargs = {}
+        for k in inputs.keys():
+            try:
+                input_kwargs[k] = getattr(self.inputs, k)
+            except AttributeError:
+                raise MissingRequiredTestInputError(f"Missing required input: {k}.")
         param_kwargs = {
             k: self.params.get(k, params[k]["default"]) for k in params.keys()
         }
@@ -127,8 +151,9 @@ def get_run_method(func, inputs, params):
         self.result = _build_result(
             results=raw_results,
             test_id=self.test_id,
-            description=self.__doc__,
+            description=inspect.getdoc(self),
             output_template=self.output_template,
+            inputs=list(inputs.keys()),
         )
         return self.result
@@ -136,6 +161,65 @@ def get_run_method(func, inputs, params):
     return run
+def _get_save_func(func, test_id):
+    def save(root_folder=".", imports=None):
+        parts = test_id.split(".")
+        if len(parts) > 1:
+            path = os.path.join(root_folder, *parts[1:-1])
+            test_name = parts[-1]
+            new_test_id = f"<test_provider_namespace>.{'.'.join(parts[1:])}"
+        else:
+            path = root_folder
+            test_name = parts[0]
+            new_test_id = f"<test_provider_namespace>.{test_name}"
+        if not os.path.exists(path):
+            os.makedirs(path, exist_ok=True)
+        full_path = os.path.join(path, f"{test_name}.py")
+        source = inspect.getsource(func)
+        # remove decorator line
+        source = source.split("\n", 1)[1]
+        if imports:
+            imports = "\n".join(imports)
+            source = f"{imports}\n\n\n{source}"
+        # add comment to the top of the file
+        source = f"""
+# Saved from {func.__module__}.{func.__name__}
+# Original Test ID: {test_id}
+# New Test ID: {new_test_id}
+{source}
+"""
+        # ensure that the function name matches the test name
+        source = source.replace(f"def {func.__name__}", f"def {test_name}")
+        # use black to format the code
+        try:
+            import black
+            source = black.format_str(source, mode=black.FileMode())
+        except ImportError:
+            # ignore if not available
+            pass
+        with open(full_path, "w") as file:
+            file.writelines(source)
+        logger.info(
+            f"Saved to {os.path.abspath(full_path)}!"
+            "Be sure to add any necessary imports to the top of the file."
+        )
+        logger.info(
+            f"This metric can be run with the ID: {new_test_id}",
+        )
+    return save
 def metric(func_or_id):
     """Decorator for creating and registering metrics with the ValidMind framework.
@@ -151,6 +235,7 @@ def metric(func_or_id):
     - Table: Either a list of dictionaries or a pandas DataFrame
     - Plot: Either a matplotlib figure or a plotly figure
+    - Scalar: A single number or string
     The function may also include a docstring. This docstring will be used and logged
     as the metric's description.
@@ -163,27 +248,66 @@ def metric(func_or_id):
         The decorated function.
     """
+    from . import _register_custom_test
     def decorator(func):
         test_id = func_or_id or f"validmind.custom_metrics.{func.__name__}"
         inputs, params = _inspect_signature(func)
         description = inspect.getdoc(func)
+        tasks = getattr(func, "__tasks__", [])
+        tags = getattr(func, "__tags__", [])
         metric_class = type(
             func.__name__,
             (Metric,),
             {
-                "run": get_run_method(func, inputs, params),
+                "run": _get_run_method(func, inputs, params),
                 "required_inputs": list(inputs.keys()),
                 "default_parameters": params,
                 "__doc__": description,
+                "metadata": {
+                    "task_types": tasks,
+                    "tags": tags,
+                },
             },
         )
         _register_custom_test(test_id, metric_class)
+        # special function to allow the function to be saved to a file
+        func.save = _get_save_func(func, test_id)
         return func
     if callable(func_or_id):
         return decorator(func_or_id)
     return decorator
+def tasks(*tasks):
+    """Decorator for specifying the task types that a metric is designed for.
+    Args:
+        *tasks: The task types that the metric is designed for.
+    """
+    def decorator(func):
+        func.__tasks__ = list(tasks)
+        return func
+    return decorator
+def tags(*tags):
+    """Decorator for specifying tags for a metric.
+    Args:
+        *tags: The tags to apply to the metric.
+    """
+    def decorator(func):
+        func.__tags__ = list(tags)
+        return func
+    return decorator

validmind/tests/model_validation/BertScore.py CHANGED Viewed

@@ -57,7 +57,7 @@ class BertScore(Metric):
     def run(self):
         y_true = list(itertools.chain.from_iterable(self.inputs.dataset.y))
-        y_pred = self.inputs.dataset.y_pred(self.inputs.model.input_id)
+        y_pred = self.inputs.dataset.y_pred(self.inputs.model)
         # Load the bert evaluation metric
         bert = evaluate.load("bertscore")

validmind/tests/model_validation/BertScoreAggregate.py CHANGED Viewed

@@ -50,7 +50,7 @@ class BertScoreAggregate(Metric):
     def run(self):
         y_true = list(itertools.chain.from_iterable(self.inputs.dataset.y))
-        y_pred = self.inputs.dataset.y_pred(self.inputs.model.input_id)
+        y_pred = self.inputs.dataset.y_pred(self.inputs.model)
         bert = evaluate.load("bertscore")
         bert_s = bert.compute(predictions=y_pred, references=y_true, lang="en")

validmind/tests/model_validation/BleuScore.py CHANGED Viewed

@@ -55,7 +55,7 @@ class BleuScore(Metric):
         # Compute the BLEU score
         bleu = bleu.compute(
-            predictions=self.inputs.dataset.y_pred(self.inputs.model.input_id),
+            predictions=self.inputs.dataset.y_pred(self.inputs.model),
             references=self.inputs.dataset.y,
         )
         return self.cache_results(metric_value={"blue_score_metric": bleu})

validmind/tests/model_validation/ClusterSizeDistribution.py CHANGED Viewed

@@ -61,7 +61,7 @@ class ClusterSizeDistribution(Metric):
     def run(self):
         y_true_train = self.inputs.dataset.y
-        y_pred_train = self.inputs.dataset.y_pred(self.inputs.model.input_id)
+        y_pred_train = self.inputs.dataset.y_pred(self.inputs.model)
         y_true_train = y_true_train.astype(y_pred_train.dtype)
         df = pd.DataFrame(
             {"Actual": y_true_train.ravel(), "Prediction": y_pred_train.ravel()}

validmind/tests/model_validation/ContextualRecall.py CHANGED Viewed

@@ -66,7 +66,7 @@ class ContextualRecall(Metric):
     def run(self):
         y_true = list(itertools.chain.from_iterable(self.inputs.dataset.y))
-        y_pred = self.inputs.dataset.y_pred(self.inputs.model.input_id)
+        y_pred = self.inputs.dataset.y_pred(self.inputs.model)
         score_list = []
         for y_t, y_p in zip(y_true, y_pred):

validmind 2.0.7__py3-none-any.whl → 2.1.0__py3-none-any.whl

validmind 2.0.7py3-none-any.whl → 2.1.0py3-none-any.whl