PyPI - validmind - Versions diffs - 2.5.8__py3-none-any.whl → 2.5.18__py3-none-any.whl - Mend

validmind 2.5.8py3-none-any.whl → 2.5.18py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (233) hide show

validmind/tests/run.py CHANGED Viewed

@@ -17,6 +17,7 @@ from validmind.vm_models import (
     MetricResult,
     ResultSummary,
     ResultTable,
+    ResultTableMetadata,
     TestContext,
     TestInput,
     ThresholdTestResults,
@@ -147,6 +148,26 @@ def _combine_figures(figure_lists: List[List[Any]], input_groups: List[Dict[str,
     return [figure for figures in figure_lists for figure in figures]
+def _combine_unit_metrics(results: List[MetricResultWrapper]):
+    if not results[0].scalar:
+        return
+    for result in results:
+        table = ResultTable(
+            data=[{"value": result.scalar}],
+            metadata=ResultTableMetadata(title="Unit Metrics"),
+        )
+        if not result.metric:
+            result.metric = MetricResult(
+                ref_id="will_be_overwritten",
+                key=result.result_id,
+                value=result.scalar,
+                summary=ResultSummary(results=[table]),
+            )
+        else:
+            result.metric.summary.results.append(table)
 def metric_comparison(
     results: List[MetricResultWrapper],
     test_id: TestID,
@@ -172,22 +193,41 @@ def metric_comparison(
                 raise ValueError(f"Unsupported type for value: {v}")
         input_group_strings.append(new_group)
-    merged_summary = _combine_summaries(
-        [
-            {"inputs": input_group_strings[i], "summary": result.metric.summary}
-            for i, result in enumerate(results)
-        ]
-    )
-    merged_figures = _combine_figures(
-        [result.figures for result in results], input_groups
-    )
-    # Patch figure metadata so they are connected to the comparison result
-    if merged_figures and len(merged_figures):
-        for i, figure in enumerate(merged_figures):
-            figure.key = f"{figure.key}-{i}"
-            figure.metadata["_name"] = test_id
-            figure.metadata["_ref_id"] = ref_id
+    # handle unit metrics (scalar values) by adding it to the summary
+    _combine_unit_metrics(results)
+    # Check if the results list contains a result object with a metric
+    if any(
+        hasattr(result, "metric")
+        and hasattr(result.metric, "summary")
+        and result.metric.summary
+        for result in results
+    ):
+        # Compute merged summaries only if there is a result with a metric
+        merged_summary = _combine_summaries(
+            [
+                {"inputs": input_group_strings[i], "summary": result.metric.summary}
+                for i, result in enumerate(results)
+            ]
+        )
+    else:
+        merged_summary = None
+    # Check if the results list contains a result object with figures
+    if any(hasattr(result, "figures") and result.figures for result in results):
+        # Compute merged figures only if there is at least one result with figures
+        merged_figures = _combine_figures(
+            [result.figures for result in results],
+            input_groups,
+        )
+        # Patch figure metadata so they are connected to the comparison result
+        if merged_figures and len(merged_figures):
+            for i, figure in enumerate(merged_figures):
+                figure.key = f"{figure.key}-{i}"
+                figure.metadata["_name"] = test_id
+                figure.metadata["_ref_id"] = ref_id
+    else:
+        merged_figures = None
     return MetricResultWrapper(
         result_id=test_id,
@@ -196,7 +236,7 @@ def metric_comparison(
                 test_id=test_id,
                 default_description=f"Comparison test result for {test_id}",
                 summary=merged_summary.serialize() if merged_summary else None,
-                figures=merged_figures,
+                figures=merged_figures if merged_figures else None,
                 should_generate=generate_description,
             ),
         ],
@@ -294,6 +334,8 @@ def threshold_test_comparison(
 def run_comparison_test(
     test_id: TestID,
     input_grid: Union[Dict[str, List[Any]], List[Dict[str, Any]]],
+    name: str = None,
+    unit_metrics: List[TestID] = None,
     params: Dict[str, Any] = None,
     show: bool = True,
     output_template: str = None,
@@ -308,6 +350,8 @@ def run_comparison_test(
     results = [
         run_test(
             test_id,
+            name=name,
+            unit_metrics=unit_metrics,
             inputs=inputs,
             show=False,
             params=params,
@@ -387,33 +431,34 @@ def run_test(
             "When providing an `input_grid`, you cannot also provide `inputs` or `kwargs`"
         )
+    if unit_metrics:
+        metric_id_name = "".join(word[0].upper() + word[1:] for word in name.split())
+        test_id = f"validmind.composite_metric.{metric_id_name}" or test_id
     if input_grid:
         return run_comparison_test(
             test_id,
             input_grid,
+            name=name,
+            unit_metrics=unit_metrics,
             params=params,
             output_template=output_template,
             show=show,
             generate_description=__generate_description,
         )
-    if test_id and test_id.startswith("validmind.unit_metrics"):
+    if test_id.startswith("validmind.unit_metrics"):
         # TODO: as we move towards a more unified approach to metrics
         # we will want to make everything functional and remove the
         # separation between unit metrics and "normal" metrics
         return run_metric(test_id, inputs=inputs, params=params, show=show)
     if unit_metrics:
-        metric_id_name = "".join(word[0].upper() + word[1:] for word in name.split())
-        test_id = f"validmind.composite_metric.{metric_id_name}"
         error, TestClass = load_composite_metric(
             unit_metrics=unit_metrics, metric_name=metric_id_name
         )
         if error:
             raise LoadTestError(error)
     else:
         TestClass = load_test(test_id, reload=True)

validmind/unit_metrics/__init__.py CHANGED Viewed

@@ -2,145 +2,111 @@
 # See the LICENSE file in the root of this repository for details.
 # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
+import glob
 import hashlib
 import json
+import os
 from importlib import import_module
+from textwrap import dedent
+from IPython.display import Markdown, display
 from validmind.input_registry import input_registry
 from validmind.tests.decorator import _build_result, _inspect_signature
-from validmind.utils import get_model_info, test_id_to_name
+from validmind.utils import test_id_to_name
 unit_metric_results_cache = {}
-def _serialize_params(params):
-    """
-    Serialize the parameters to a unique hash, handling None values.
-    This function serializes the parameters dictionary to a JSON string,
-    then creates a SHA-256 hash of the string to ensure a unique identifier
-    for the parameters. If params is None, a default hash is returned.
-    Args:
-        params (dict or None): The parameters to be serialized.
-    Returns:
-        str: A SHA-256 hash of the JSON string representation of the params,
-             or a default hash if params is None.
-    """
-    if params is None:
-        # Handle None by returning a hash of an empty dictionary or a predefined value
-        params_json = json.dumps({})
-    else:
-        params_json = json.dumps(params, sort_keys=True)
-    hash_object = hashlib.sha256(params_json.encode())
-    return hash_object.hexdigest()
-def _serialize_model(model):
-    """
-    Generate a SHA-256 hash for a scikit-learn model based on its type and parameters.
-    Args:
-        model VMModel: The model to be serialized.
-    Returns:
-        str: A SHA-256 hash of the model's description.
-    """
-    model_info = get_model_info(model)
+def _serialize_dataset(dataset, model=None, sample_size=1000):
+    columns = [*dataset.feature_columns, dataset.target_column]
+    if model:
+        columns.append(dataset.prediction_column(model))
-    model_json = json.dumps(model_info, sort_keys=True)
+    df = dataset._df[columns]
-    # Create a SHA-256 hash of the JSON string
-    hash_object = hashlib.sha256(model_json.encode())
-    return hash_object.hexdigest()
+    return hashlib.md5(
+        df.sample(n=min(sample_size, df.shape[0]), random_state=42)
+        .to_string(header=True, index=True)
+        .encode()
+    ).hexdigest()
-def _serialize_dataset(dataset, model):
-    """
-    Serialize the description of the dataset input to a unique hash.
+def _get_metric_cache_key(metric_id, inputs, params):
+    cache_elements = [
+        metric_id,
+        hashlib.md5(json.dumps(params, sort_keys=True).encode()).hexdigest(),
+    ]
-    This function generates a hash based on the dataset's structure, including
-    the target and feature columns, the prediction column associated with a specific model ID,
-    and directly incorporates the model ID and prediction column name to ensure uniqueness.
+    if "model" in inputs:
+        cache_elements.append(inputs["model"].input_id)
-    Args:
-        dataset: The dataset object, which should have properties like _df (pandas DataFrame),
-                 target_column (string), feature_columns (list of strings), and extra_columns (dict).
-        model (VMModel): The model whose predictions will be included in the serialized dataset
-    Returns:
-        str: MD5 hash of the dataset
+    if "dataset" in inputs:
+        cache_elements.append(inputs["dataset"].input_id)
+        cache_elements.append(
+            _serialize_dataset(inputs["dataset"], inputs.get("model"))
+        )
-    Note:
-        Including the model ID and prediction column name in the hash calculation ensures uniqueness,
-        especially in cases where the predictions are sparse or the dataset has not significantly changed.
-        This approach guarantees that the hash will distinguish between model-generated predictions
-        and pre-computed prediction columns, addressing potential hash collisions.
-    """
-    return _fast_hash(
-        dataset._df[
-            [
-                *dataset.feature_columns,
-                dataset.target_column,
-                dataset.prediction_column(model),
-            ]
-        ]
-    )
+    return hashlib.md5("_".join(cache_elements).encode()).hexdigest()
-def _fast_hash(df, sample_size=1000):
-    """
-    Generates a fast hash by sampling, converting to string and md5 hashing.
+def describe_metric(metric_id, raw=False):
+    """Describe a metric
     Args:
-        df (pd.DataFrame): The DataFrame to hash.
-        sample_size (int): The maximum number of rows to include in the sample.
+        metric_id (str): The metric id (e.g. 'validmind.unit_metrics.classification.F1')
+        raw (bool): Whether to return the description as a dictionary
     Returns:
-        str: MD5 hash of the DataFrame.
+        dict: A dictionary containing the metric description
     """
-    df_sample = df.sample(n=min(sample_size, len(df)), random_state=42)
-    return hashlib.md5(
-        df_sample.to_string(header=True, index=True).encode()
-    ).hexdigest()
+    metric = load_metric(metric_id)
+    inputs, params = _inspect_signature(metric)
+    if raw:
+        return {
+            "id": metric_id,
+            "description": metric.__doc__,
+            "inputs": inputs,
+            "params": params,
+        }
-def get_metric_cache_key(metric_id, params, inputs):
-    cache_elements = [metric_id]
+    inputs = ", ".join(inputs.keys())
+    params = ", ".join(params.keys())
+    description_md = f"""
+    ### {test_id_to_name(metric_id)} (*'{metric_id}'*)
-    # Serialize params if not None
-    serialized_params = _serialize_params(params) if params else "None"
-    cache_elements.append(serialized_params)
+    {metric.__doc__ or ""}
-    # Check if 'inputs' is a dictionary
-    if not isinstance(inputs, dict):
-        raise TypeError("Expected 'inputs' to be a dictionary.")
+    **Inputs**: {inputs}
-    # Check for 'model' and 'dataset' keys in 'inputs'
-    if "model" not in inputs or "dataset" not in inputs:
-        raise ValueError("Missing 'model' or 'dataset' in 'inputs'.")
-    dataset = inputs["dataset"]
-    model = inputs["model"]
+    **Parameters**: {params}
+    """
+    display(Markdown(dedent(description_md)))
-    cache_elements.append(_serialize_dataset(dataset, model))
-    cache_elements.append(_serialize_model(model))
+def list_metrics():
+    """List all available metrics
-    # Combine elements to form the cache key
-    combined_elements = "_".join(cache_elements)
-    key = hashlib.sha256(combined_elements.encode()).hexdigest()
-    return key
+    Returns:
+        list: A list of metric ids
+    """
+    # current directory of this file is the __init__.py file in the validmind/unit_metrics directory
+    # glob for all metrics in the unit_metrics directory (indicated by capitalized python files)
+    # recursive since we want to include subdirectories
+    curr_dir = os.path.dirname(os.path.realpath(__file__))
+    return [
+        f"{__name__}.{os.path.relpath(metric, curr_dir).replace('/', '.')[:-3]}"
+        for metric in glob.glob(f"{curr_dir}/**/*.py", recursive=True)
+        if os.path.isfile(metric) and os.path.basename(metric)[0].isupper()
+    ]
 def load_metric(metric_id):
     """Load a metric class from a string
     Args:
-        metric_id (str): The metric id (e.g. 'validmind.unit_metrics.classification.sklearn.F1')
+        metric_id (str): The metric id (e.g. 'validmind.unit_metrics.classification.F1')
     Returns:
         callable: The metric function
@@ -152,7 +118,7 @@ def run_metric(metric_id, inputs=None, params=None, show=True, value_only=False)
     """Run a single metric and cache the results
     Args:
-        metric_id (str): The metric id (e.g. 'validmind.unit_metrics.classification.sklearn.F1')
+        metric_id (str): The metric id (e.g. 'validmind.unit_metrics.classification.F1')
         inputs (dict): A dictionary of the metric inputs
         params (dict): A dictionary of the metric parameters
         show (bool): Whether to display the results
@@ -164,7 +130,7 @@ def run_metric(metric_id, inputs=None, params=None, show=True, value_only=False)
     }
     params = params or {}
-    cache_key = get_metric_cache_key(metric_id, params, inputs)
+    cache_key = _get_metric_cache_key(metric_id, inputs, params)
     if cache_key not in unit_metric_results_cache:
         metric = load_metric(metric_id)
@@ -182,53 +148,24 @@ def run_metric(metric_id, inputs=None, params=None, show=True, value_only=False)
             result,
             # store the input ids that were used to calculate the result
             [v.input_id for v in inputs.values()],
+            # store the params that were used to calculate the result
+            params,
         )
-    value = unit_metric_results_cache[cache_key][0]
+    cached_result = unit_metric_results_cache[cache_key]
     if value_only:
-        return value
-    output_template = f"""
-    <table>
-        <thead>
-            <tr>
-                <th>Metric</th>
-                <th>Value</th>
-            </tr>
-        </thead>
-        <tbody>
-            <tr>
-                <td><strong>{test_id_to_name(metric_id)}</strong></td>
-                <td>{value:.4f}</td>
-            </tr>
-        </tbody>
-    </table>
-    <style>
-        th, td {{
-            padding: 5px;
-            text-align: left;
-        }}
-    </style>
-    """
-    result = _build_result(
-        results=value,
+        return cached_result[0]
+    result_wrapper = _build_result(
+        results=cached_result[0],
         test_id=metric_id,
-        description="",
-        output_template=output_template,
-        inputs=unit_metric_results_cache[cache_key][1],
+        inputs=cached_result[1],
+        params=cached_result[2],
+        generate_description=False,
     )
-    # in case the user tries to log the result object
-    def log():
-        raise Exception(
-            "Cannot log unit metrics directly..."
-            "You can run this unit metric as part of a composite metric and log that"
-        )
-    result.log = log
     if show:
-        result.show()
+        result_wrapper.show()
-    return result
+    return result_wrapper

validmind/unit_metrics/classification/{sklearn/Accuracy.py → Accuracy.py} RENAMED Viewed

@@ -7,8 +7,8 @@ from sklearn.metrics import accuracy_score
 from validmind import tags, tasks
-@tags("classification", "sklearn", "unit_metric")
 @tasks("classification")
+@tags("classification")
 def Accuracy(dataset, model):
     """Calculates the accuracy of a model"""
     return accuracy_score(dataset.y, dataset.y_pred(model))

validmind/unit_metrics/classification/{sklearn/F1.py → F1.py} RENAMED Viewed

@@ -7,7 +7,7 @@ from sklearn.metrics import f1_score
 from validmind import tags, tasks
-@tags("classification", "sklearn", "unit_metric")
 @tasks("classification")
+@tags("classification")
 def F1(model, dataset, **kwargs):
     return f1_score(dataset.y, dataset.y_pred(model), **kwargs)

validmind/unit_metrics/classification/{sklearn/Precision.py → Precision.py} RENAMED Viewed

@@ -7,7 +7,7 @@ from sklearn.metrics import precision_score
 from validmind import tags, tasks
-@tags("classification", "sklearn", "unit_metric")
 @tasks("classification")
+@tags("classification")
 def Precision(model, dataset, **kwargs):
     return precision_score(dataset.y, dataset.y_pred(model), **kwargs)

validmind/unit_metrics/classification/{sklearn/ROC_AUC.py → ROC_AUC.py} RENAMED Viewed

@@ -9,10 +9,9 @@ from sklearn.preprocessing import LabelBinarizer
 from validmind import tags, tasks
-@tags("classification", "sklearn", "unit_metric")
 @tasks("classification")
+@tags("classification")
 def ROC_AUC(model, dataset, **kwargs):
     y_true = dataset.y
     if len(unique(y_true)) > 2:

validmind/unit_metrics/classification/{sklearn/Recall.py → Recall.py} RENAMED Viewed

@@ -7,7 +7,7 @@ from sklearn.metrics import recall_score
 from validmind import tags, tasks
-@tags("classification", "sklearn", "unit_metric")
 @tasks("classification")
+@tags("classification")
 def Recall(model, dataset, **kwargs):
     return recall_score(dataset.y, dataset.y_pred(model), **kwargs)

validmind/unit_metrics/regression/{sklearn/AdjustedRSquaredScore.py → AdjustedRSquaredScore.py} RENAMED Viewed

@@ -7,7 +7,7 @@ from sklearn.metrics import r2_score as _r2_score
 from validmind import tags, tasks
-@tags("regression", "sklearn", "unit_metric")
+@tags("regression")
 @tasks("regression")
 def AdjustedRSquaredScore(model, dataset):
     r2_score = _r2_score(

validmind/unit_metrics/regression/GiniCoefficient.py CHANGED Viewed

@@ -7,7 +7,7 @@ import numpy as np
 from validmind import tags, tasks
-@tags("regression", "unit_metric")
+@tags("regression")
 @tasks("regression")
 def GiniCoefficient(dataset, model):
     y_true = dataset.y

validmind/unit_metrics/regression/HuberLoss.py CHANGED Viewed

@@ -7,7 +7,7 @@ import numpy as np
 from validmind import tags, tasks
-@tags("regression", "unit_metric")
+@tags("regression")
 @tasks("regression")
 def HuberLoss(model, dataset):
     y_true = dataset.y

validmind/unit_metrics/regression/KolmogorovSmirnovStatistic.py CHANGED Viewed

@@ -7,7 +7,7 @@ import numpy as np
 from validmind import tags, tasks
-@tags("regression", "unit_metric")
+@tags("regression")
 @tasks("regression")
 def KolmogorovSmirnovStatistic(dataset, model):
     y_true = dataset.y.flatten()

validmind/unit_metrics/regression/{sklearn/MeanAbsoluteError.py → MeanAbsoluteError.py} RENAMED Viewed

@@ -7,7 +7,7 @@ from sklearn.metrics import mean_absolute_error as _mean_absolute_error
 from validmind import tags, tasks
-@tags("regression", "sklearn", "unit_metric")
+@tags("regression")
 @tasks("regression")
 def MeanAbsoluteError(model, dataset, **kwargs):
     return _mean_absolute_error(dataset.y, dataset.y_pred(model), **kwargs)

validmind/unit_metrics/regression/MeanAbsolutePercentageError.py CHANGED Viewed

@@ -7,7 +7,7 @@ import numpy as np
 from validmind import tags, tasks
-@tags("regression", "unit_metric")
+@tags("regression")
 @tasks("regression")
 def MeanAbsolutePercentageError(model, dataset):
     y_true = dataset.y

validmind/unit_metrics/regression/MeanBiasDeviation.py CHANGED Viewed

@@ -7,7 +7,7 @@ import numpy as np
 from validmind import tags, tasks
-@tags("regression", "unit_metric")
+@tags("regression")
 @tasks("regression")
 def MeanBiasDeviation(model, dataset):
     return np.mean(dataset.y - dataset.y_pred(model))

validmind/unit_metrics/regression/{sklearn/MeanSquaredError.py → MeanSquaredError.py} RENAMED Viewed

@@ -7,7 +7,7 @@ from sklearn.metrics import mean_squared_error
 from validmind import tags, tasks
-@tags("regression", "sklearn", "unit_metric")
+@tags("regression")
 @tasks("regression")
 def MeanSquaredError(model, dataset, **kwargs):
     return mean_squared_error(dataset.y, dataset.y_pred(model), **kwargs)

validmind/unit_metrics/regression/QuantileLoss.py CHANGED Viewed

@@ -7,7 +7,7 @@ import numpy as np
 from validmind import tags, tasks
-@tags("regression", "unit_metric")
+@tags("regression")
 @tasks("regression")
 def QuantileLoss(model, dataset, quantile=0.5):
     error = dataset.y - dataset.y_pred(model)

validmind/unit_metrics/regression/{sklearn/RSquaredScore.py → RSquaredScore.py} RENAMED Viewed

@@ -7,7 +7,7 @@ from sklearn.metrics import r2_score
 from validmind import tags, tasks
-@tags("regression", "sklearn", "unit_metric")
+@tags("regression")
 @tasks("regression")
 def RSquaredError(model, dataset):
     return r2_score(dataset.y, dataset.y_pred(model))

validmind/unit_metrics/regression/{sklearn/RootMeanSquaredError.py → RootMeanSquaredError.py} RENAMED Viewed

@@ -8,7 +8,7 @@ from sklearn.metrics import mean_squared_error
 from validmind import tags, tasks
-@tags("regression", "sklearn", "unit_metric")
+@tags("regression")
 @tasks("regression")
 def RootMeanSquaredError(model, dataset, **kwargs):
     return np.sqrt(

validmind/utils.py CHANGED Viewed

@@ -175,6 +175,10 @@ def format_records(df):
             continue
         not_zero = df[col][df[col] != 0]
         min_number = not_zero.min()
+        if math.isnan(min_number) or math.isinf(min_number):
+            df[col] = df[col].round(DEFAULT_SMALL_NUMBER_DECIMALS)
+            continue
         _, min_scale = precision_and_scale(min_number)
         if min_number >= 10:

validmind/vm_models/dataset/dataset.py CHANGED Viewed

@@ -323,6 +323,7 @@ class VMDataset(VMInput):
         if column_name and column_name in self.feature_columns:
             self.feature_columns.remove(column_name)
+            self._set_feature_columns(self.feature_columns)
         return self.extra_columns.prediction_column(model, column_name)
@@ -333,6 +334,7 @@ class VMDataset(VMInput):
         if column_name and column_name in self.feature_columns:
             self.feature_columns.remove(column_name)
+            self._set_feature_columns(self.feature_columns)
         return self.extra_columns.probability_column(model, column_name)

validmind/vm_models/figure.py CHANGED Viewed

@@ -157,6 +157,11 @@ class Figure:
             return f"data:image/png;base64,{b64_data}"
+        elif is_png_image(self.figure):
+            b64_data = base64.b64encode(self.figure).decode("utf-8")
+            return f"data:image/png;base64,{b64_data}"
         raise UnsupportedFigureError(
             f"Unrecognized figure type: {get_full_typename(self.figure)}"
         )

validmind 2.5.8__py3-none-any.whl → 2.5.18__py3-none-any.whl

validmind 2.5.8py3-none-any.whl → 2.5.18py3-none-any.whl