PyPI - validmind - Versions diffs - 2.1.0__py3-none-any.whl → 2.2.2__py3-none-any.whl - Mend

validmind 2.1.0py3-none-any.whl → 2.2.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (110) hide show

validmind/models/pytorch.py CHANGED Viewed

@@ -4,37 +4,21 @@
 from validmind.errors import MissingOrInvalidModelPredictFnError
 from validmind.logging import get_logger
-from validmind.vm_models.model import (
-    ModelAttributes,
-    VMModel,
-    has_method_with_arguments,
-)
+from validmind.vm_models.model import VMModel, has_method_with_arguments
 logger = get_logger(__name__)
 class PyTorchModel(VMModel):
-    """
-    An PyTorch model class that wraps a trained model instance and its associated data.
+    """PyTorchModel class wraps a PyTorch model"""
-    Attributes:
-        attributes (ModelAttributes, optional): The attributes of the model. Defaults to None.
-        model (object, optional): The trained model instance. Defaults to None.
-        device_type(str, optional) The device where model is trained
-    """
+    def __post_init__(self):
+        if not self.model:
+            raise ValueError("Model object is a required argument for PyTorchModel")
-    def __init__(
-        self,
-        model: object = None,  # Trained model instance
-        input_id: str = None,
-        attributes: ModelAttributes = None,
-    ):
-        super().__init__(
-            model=model,
-            input_id=input_id,
-            attributes=attributes,
-        )
-        self._device_type = next(self.model.parameters()).device
+        self.library = "torch"
+        self.name = self.name or "PyTorch Neural Network"
+        self.device_type = next(self.model.parameters()).device
     def predict_proba(self, *args, **kwargs):
         """
@@ -61,21 +45,3 @@ class PyTorchModel(VMModel):
         import torch
         return self.model.predict(torch.tensor(args[0]).to(self.device_type))
-    def model_library(self):
-        """
-        Returns the model library name
-        """
-        return "torch"
-    def model_class(self):
-        """
-        Returns the model class name
-        """
-        return "PyTorchModel"
-    def model_name(self):
-        """
-        Returns model architecture
-        """
-        return "PyTorch Neural Networks"

validmind/models/r_model.py CHANGED Viewed

@@ -6,7 +6,7 @@ import numpy as np
 import pandas as pd
 from validmind.logging import get_logger
-from validmind.vm_models.model import ModelAttributes, VMModel
+from validmind.vm_models.model import VMModel
 logger = get_logger(__name__)
@@ -16,49 +16,23 @@ def get_full_class_name(obj):
 class RModel(VMModel):
-    """
-    An R model class that wraps a "fitted" R model instance and its associated data.
-    Attributes:
-        attributes (ModelAttributes, optional): The attributes of the model. Defaults to None.
-        model (object, optional): The trained model instance. Defaults to None.
-        device_type(str, optional) The device where model is trained
-    """
-    def __init__(
-        self,
-        r: object = None,  # R instance
-        model: object = None,  # Trained model instance
-        attributes: ModelAttributes = None,
-    ):
-        self.r = r
-        self._is_classification_model = False
-        super().__init__(
-            model=model,
-            attributes=attributes,
+    """An R model class that wraps a "fitted" R model instance and its associated data."""
+    def __post_init__(self):
+        self.language = self.r["version"].rx2("version.string")[0]
+        self.library = self.class_ = "R"
+        name_map = {
+            "xgb.Booster": "XGBoost",
+            "glm": self.__glm_model_class(),
+            "lm": "Linear Regression",
+        }
+        self.name = self.name or name_map.get(
+            self.__model_class(), self.__model_class()
         )
         self._is_classification_model = self.__is_classification_model()
-    def __get_predict_data_as_df(self, new_data):
-        """
-        Builds the correct data shape and format for the predict method when the
-        caller has passed a Pandas dataframe as input. This function makes sure to
-        adjust the shape of the input dataset to the predict() signature depending
-        if it's a regular R model or an XGBoost model
-        """
-        if self.__model_class() == "xgb.Booster":
-            return new_data.df.drop(new_data.target_column, axis=1)
-        return new_data.df
-    def __model_class(self):
-        """
-        Returns the model class name
-        """
-        return self.r["class"](self.model)[0]
     def __is_classification_model(self):
         """
         Only supported classification models are XGBClassifier and GLM with binomial family (logistic regression).
@@ -78,6 +52,24 @@ class RModel(VMModel):
         return False
+    def __get_predict_data_as_df(self, new_data):
+        """
+        Builds the correct data shape and format for the predict method when the
+        caller has passed a Pandas dataframe as input. This function makes sure to
+        adjust the shape of the input dataset to the predict() signature depending
+        if it's a regular R model or an XGBoost model
+        """
+        if self.__model_class() == "xgb.Booster":
+            return new_data.df.drop(new_data.target_column, axis=1)
+        return new_data.df
+    def __model_class(self):
+        """
+        Returns the model class name
+        """
+        return self.r["class"](self.model)[0]
     def __glm_model_class(self):
         """
         Returns the model class name for GLM models which include family and link function
@@ -142,9 +134,7 @@ class RModel(VMModel):
         if new_data_class == "numpy.ndarray":
             # We need to reconstruct the DataFrame from the ndarray using the column names
-            new_data = pd.DataFrame(
-                new_data, columns=self.test_ds.get_features_columns()
-            )
+            new_data = pd.DataFrame(new_data, columns=self.test_ds.feature_columns)
         elif new_data_class != "pandas.core.frame.DataFrame":
             raise ValueError(
                 f"new_data must be a DataFrame or ndarray. Got {new_data_class}"
@@ -163,45 +153,6 @@ class RModel(VMModel):
         return predicted_probs
-    def model_language(self):
-        """
-        Returns the model library name
-        """
-        return self.r["version"].rx2("version.string")[0]
-    def model_library(self):
-        """
-        Returns the model library name
-        """
-        return "R"
-    def model_library_version(self, *args, **kwargs):
-        """
-        Model framework library version
-        """
-        return "N/A"
-    def model_class(self):
-        """
-        Returns the model class name
-        """
-        return "R"
-    def model_name(self):
-        """
-        Returns model name
-        """
-        model_class_name = self.__model_class()
-        if model_class_name == "lm":
-            return "Linear Regression"
-        elif model_class_name == "xgb.Booster":
-            return "XGBoost"
-        elif model_class_name == "glm":
-            return self.__glm_model_class()
-        return model_class_name
     def regression_coefficients(self):
         """
         Returns the regression coefficients summary of the model

validmind/models/sklearn.py CHANGED Viewed

@@ -2,38 +2,23 @@
 # See the LICENSE file in the root of this repository for details.
 # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
+import pandas as pd
 from validmind.errors import MissingOrInvalidModelPredictFnError
 from validmind.logging import get_logger
-from validmind.vm_models.model import (
-    ModelAttributes,
-    VMModel,
-    has_method_with_arguments,
-)
+from validmind.vm_models.model import VMModel, has_method_with_arguments
 logger = get_logger(__name__)
 class SKlearnModel(VMModel):
-    """
-    An SKlearn model class that wraps a trained model instance and its associated data.
-    Attributes:
-        attributes (ModelAttributes, optional): The attributes of the model. Defaults to None.
-        model (object, optional): The trained model instance. Defaults to None.
-        device_type(str, optional) The device where model is trained
-    """
-    def __init__(
-        self,
-        model: object = None,  # Trained model instance
-        input_id: str = None,
-        attributes: ModelAttributes = None,
-    ):
-        super().__init__(
-            model=model,
-            input_id=input_id,
-            attributes=attributes,
-        )
+    def __post_init__(self):
+        if not self.model:
+            raise ValueError("Model object is a required argument for SKlearnModel")
+        self.library = self.model.__class__.__module__.split(".")[0]
+        self.class_ = self.model.__class__.__name__
+        self.name = self.name or type(self.model).__name__
     def predict_proba(self, *args, **kwargs):
         """
@@ -54,20 +39,36 @@ class SKlearnModel(VMModel):
         """
         return self.model.predict(*args, **kwargs)
-    def model_library(self):
-        """
-        Returns the model library name
-        """
-        return self.model.__class__.__module__.split(".")[0]
-    def model_class(self):
-        """
-        Returns the model class name
-        """
-        return self.model.__class__.__name__
+class CatBoostModel(SKlearnModel):
+    """Wrapper for CatBoost model"""
+    pass
-    def model_name(self):
+class XGBoostModel(SKlearnModel):
+    """Wrapper for XGBoost model"""
+    def __post_init__(self):
+        super().__post_init__()
+        self.library = "xgboost"
+class StatsModelsModel(SKlearnModel):
+    """Wrapper for StatsModels model"""
+    def __post_init__(self):
+        super().__post_init__()
+        self.library = "statsmodels"
+    def regression_coefficients(self):
         """
-        Returns model name
+        Returns the regression coefficients summary of the model
         """
-        return type(self.model).__name__
+        raw_summary = self.model.summary()
+        table = raw_summary.tables[1].data
+        headers = table.pop(0)
+        headers[0] = "Feature"
+        return pd.DataFrame(table, columns=headers)

validmind/template.py CHANGED Viewed

@@ -2,20 +2,15 @@
 # See the LICENSE file in the root of this repository for details.
 # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
-from pprint import pformat
-import mistune
-from IPython.display import display
 from ipywidgets import HTML, Accordion, VBox
 from .html_templates.content_blocks import (
     failed_content_block_html,
     non_test_content_block_html,
-    test_content_block_html,
 )
 from .logging import get_logger
 from .tests import LoadTestError, describe_test
-from .utils import is_notebook
+from .utils import display, is_notebook
 from .vm_models import TestSuite
 logger = get_logger(__name__)
@@ -26,6 +21,7 @@ CONTENT_TYPE_MAP = {
     "metadata_text": "Metadata Text",
     "dynamic": "Dynamic Content",
     "text": "Text",
+    "risk_assessment": "Risk Assessment",
 }
@@ -66,29 +62,12 @@ def _create_content_widget(content):
         )
     try:
-        test_deets = describe_test(test_id=content["content_id"], raw=True)
+        test_html = describe_test(test_id=content["content_id"], show=False)
     except LoadTestError:
         return HTML(failed_content_block_html.format(test_id=content["content_id"]))
     return Accordion(
-        children=[
-            HTML(
-                test_content_block_html.format(
-                    title=test_deets["Name"],
-                    description=mistune.html(test_deets["Description"]),
-                    required_inputs=", ".join(
-                        test_deets["Required Inputs"] or ["None"]
-                    ),
-                    params_table="\n".join(
-                        [
-                            f"<tr><td>{param}</td><td>{pformat(value, indent=4)}</td></tr>"
-                            for param, value in test_deets["Params"].items()
-                        ]
-                    ),
-                    table_display="table" if test_deets["Params"] else "none",
-                )
-            )
-        ],
+        children=[HTML(test_html)],
         titles=[f"{content_type} Block: '{content['content_id']}'"],
     )
@@ -117,7 +96,10 @@ def _create_sub_section_widget(sub_sections, section_number):
                 contents_widget,
             )
         else:
-            accordion.children = (*accordion.children, HTML("<p>Empty Section</p>"))
+            accordion.children = (
+                *accordion.children,
+                HTML("<p>Empty Section</p>"),
+            )
         accordion.set_title(
             i, f"{section_number}.{i + 1}. {section['title']} ('{section['id']}')"

validmind/tests/__init__.py CHANGED Viewed

@@ -6,22 +6,29 @@
 import importlib
 import inspect
+import json
 import sys
 from pathlib import Path
 from pprint import pformat
 from typing import Dict
+from uuid import uuid4
-import mistune
 import pandas as pd
-from IPython.display import display
-from ipywidgets import HTML
+from ipywidgets import HTML, Accordion
 from ..errors import LoadTestError
 from ..html_templates.content_blocks import test_content_block_html
 from ..logging import get_logger
 from ..unit_metrics import run_metric
 from ..unit_metrics.composite import load_composite_metric
-from ..utils import format_dataframe, fuzzy_match, test_id_to_name
+from ..utils import (
+    NumpyEncoder,
+    display,
+    format_dataframe,
+    fuzzy_match,
+    md_to_html,
+    test_id_to_name,
+)
 from ..vm_models import TestContext, TestInput
 from .decorator import metric, tags, tasks
 from .test_providers import LocalTestProvider, TestProvider
@@ -75,10 +82,12 @@ def _pretty_list_tests(tests, truncate=True):
     table = [
         {
-            "Test Type": __test_classes[test_id].test_type,
+            "ID": test_id,
             "Name": test_id_to_name(test_id),
+            "Test Type": __test_classes[test_id].test_type,
             "Description": _test_description(__test_classes[test_id], truncate),
-            "ID": test_id,
+            "Required Inputs": __test_classes[test_id].required_inputs,
+            "Params": __test_classes[test_id].default_params or {},
         }
         for test_id in tests
     ]
@@ -339,7 +348,7 @@ def load_test(test_id: str, reload=False):
     return test
-def describe_test(test_id: str = None, raw: bool = False):
+def describe_test(test_id: str = None, raw: bool = False, show: bool = True):
     """Get or show details about the test
     This function can be used to see test details including the test name, description,
@@ -365,20 +374,34 @@ def describe_test(test_id: str = None, raw: bool = False):
     if raw:
         return details
+    html = test_content_block_html.format(
+        test_id=test_id,
+        uuid=str(uuid4()),
+        title=f'{details["Name"]}',
+        description=md_to_html(details["Description"].strip()),
+        required_inputs=", ".join(details["Required Inputs"] or ["None"]),
+        params_table="\n".join(
+            [
+                f"<tr><td>{param}</td><td>{pformat(value, indent=4)}</td></tr>"
+                for param, value in details["Params"].items()
+            ]
+        ),
+        table_display="table" if details["Params"] else "none",
+        example_inputs=json.dumps(
+            {name: f"my_vm_{name}" for name in details["Required Inputs"]},
+            indent=4,
+        ),
+        example_params=json.dumps(details["Params"] or {}, indent=4, cls=NumpyEncoder),
+        instructions_display="block" if show else "none",
+    )
+    if not show:
+        return html
     display(
-        HTML(
-            test_content_block_html.format(
-                title=f'{details["Name"]}',
-                description=mistune.html(details["Description"].strip()),
-                required_inputs=", ".join(details["Required Inputs"] or ["None"]),
-                params_table="\n".join(
-                    [
-                        f"<tr><td>{param}</td><td>{pformat(value, indent=4)}</td></tr>"
-                        for param, value in details["Params"].items()
-                    ]
-                ),
-                table_display="table" if details["Params"] else "none",
-            )
+        Accordion(
+            children=[HTML(html)],
+            titles=[f"Test Description: {details['Name']} ('{test_id}')"],
         )
     )

validmind/tests/data_validation/ANOVAOneWayTable.py CHANGED Viewed

@@ -74,7 +74,7 @@ class ANOVAOneWayTable(Metric):
         # Select all numerical features if none are specified
         if features is None:
-            features = self.inputs.dataset.get_numeric_features_columns()
+            features = self.inputs.dataset.feature_columns_numeric
         anova_results = self.anova_numerical_features(features, p_threshold)

validmind/tests/data_validation/ChiSquaredFeaturesTable.py CHANGED Viewed

@@ -72,7 +72,7 @@ class ChiSquaredFeaturesTable(Metric):
         # Ensure cat_features is provided
         if not cat_features:
-            cat_features = self.inputs.dataset.get_categorical_features_columns()
+            cat_features = self.inputs.dataset.feature_columns_categorical
         df = self.inputs.dataset.df

validmind/tests/data_validation/DescriptiveStatistics.py CHANGED Viewed

@@ -116,10 +116,8 @@ class DescriptiveStatistics(Metric):
     def run(self):
         feature_columns = self.inputs.dataset.feature_columns
-        numerical_feature_columns = self.inputs.dataset.get_numeric_features_columns()
-        categorical_feature_columns = (
-            self.inputs.dataset.get_categorical_features_columns()
-        )
+        numerical_feature_columns = self.inputs.dataset.feature_columns_numeric
+        categorical_feature_columns = self.inputs.dataset.feature_columns_categorical
         df = self.inputs.dataset.df[feature_columns]

validmind/tests/data_validation/Duplicates.py CHANGED Viewed

@@ -84,7 +84,7 @@ class Duplicates(ThresholdTest):
         if self.inputs.dataset.text_column:
             columns = self.inputs.dataset.text_column
         else:
-            columns = self.inputs.dataset.get_features_columns()
+            columns = self.inputs.dataset.feature_columns
         df = self.inputs.dataset.df[columns]
         # Find duplicate rows

validmind/tests/data_validation/IsolationForestOutliers.py CHANGED Viewed

@@ -64,13 +64,13 @@ class IsolationForestOutliers(Metric):
     def run(self):
         if self.params["features_columns"] is None:
-            features_list = self.inputs.dataset.get_features_columns()
+            features_list = self.inputs.dataset.feature_columns
         else:
             features_list = self.params["features_columns"]
         # Check if all elements from features_list are present in the feature columns
         all_present = all(
-            elem in self.inputs.dataset.get_features_columns() for elem in features_list
+            elem in self.inputs.dataset.feature_columns for elem in features_list
         )
         if not all_present:
             raise ValueError(

validmind/tests/data_validation/LaggedCorrelationHeatmap.py CHANGED Viewed

@@ -115,7 +115,7 @@ class LaggedCorrelationHeatmap(Metric):
         else:
             target_col = self.inputs.dataset.target_column
-        independent_vars = list(self.inputs.dataset.get_features_columns())
+        independent_vars = list(self.inputs.dataset.feature_columns)
         num_lags = self.params.get("num_lags", 10)
         if isinstance(target_col, list) and len(target_col) == 1:

validmind/tests/data_validation/TargetRateBarPlots.py CHANGED Viewed

@@ -57,7 +57,7 @@ class TargetRateBarPlots(Metric):
         # Use all categorical features if columns is not specified, else use selected columns
         if columns is None:
-            features = self.inputs.dataset.get_categorical_features_columns()
+            features = self.inputs.dataset.feature_columns_categorical
         else:
             features = columns

validmind/tests/data_validation/nlp/LanguageDetection.py ADDED Viewed

@@ -0,0 +1,59 @@
+# Copyright © 2023-2024 ValidMind Inc. All rights reserved.
+# See the LICENSE file in the root of this repository for details.
+# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
+"""
+Metrics functions for any Pandas-compatible datasets
+"""
+import plotly.express as px
+from langdetect import LangDetectException, detect
+from validmind import tags, tasks
+@tags("nlp", "text_data", "visualization")
+@tasks("text_classification", "text_summarization")
+def LanguageDetection(dataset):
+    """
+    Detects the language of each text entry in a dataset and visualizes the distribution of languages
+    as a histogram.
+    This method checks for a specified text column in the dataset's dataframe, uses a language detection
+    library to determine the language of each text entry, and returns a histogram plot of the language
+    distribution.
+    Args:
+        dataset (Dataset): A dataset object which must have a `df` attribute (a pandas DataFrame)
+            and a `text_column` attribute indicating the name of the column containing text. If the
+            `text_column` attribute is not set, a ValueError is raised.
+    Returns:
+        plotly.graph_objs._figure.Figure: A Plotly histogram plot showing the distribution of detected
+        languages across the dataset's text entries.
+    Raises:
+        ValueError: If the `text_column` is not specified in the dataset object.
+    """
+    # check text column
+    if not dataset.text_column:
+        raise ValueError("Please set text_column name in the Validmind Dataset object")
+    # Function to detect language
+    def detect_language(text):
+        try:
+            return detect(text)
+        except LangDetectException:
+            return "Unknown"  # Return 'Unknown' if language detection fails
+    # Applying the language detection function to each text entry
+    languages = dataset.df[dataset.text_column].apply(detect_language)
+    fig = px.histogram(
+        languages,
+        x=languages,
+        title="Language Distribution",
+        labels={"x": "Language Codes"},
+    )
+    return fig

validmind 2.1.0__py3-none-any.whl → 2.2.2__py3-none-any.whl

validmind 2.1.0py3-none-any.whl → 2.2.2py3-none-any.whl