PyPI - validmind - Versions diffs - 2.1.0__py3-none-any.whl → 2.2.2__py3-none-any.whl - Mend

validmind 2.1.0py3-none-any.whl → 2.2.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (110) hide show

validmind/vm_models/model.py CHANGED Viewed

@@ -7,9 +7,11 @@ Model class wrapper module
 """
 import importlib
 import inspect
-from abc import abstractmethod
+from abc import ABC, abstractmethod
 from dataclasses import dataclass
+from validmind.errors import MissingOrInvalidModelPredictFnError
 SUPPORTED_LIBRARIES = {
     "catboost": "CatBoostModel",
     "xgboost": "XGBoostModel",
@@ -17,6 +19,8 @@ SUPPORTED_LIBRARIES = {
     "statsmodels": "StatsModelsModel",
     "torch": "PyTorchModel",
     "transformers": "HFModel",
+    "function": "FunctionModel",
+    "pipeline": "PipelineModel",
     "custom": "SKlearnModel",
 }
@@ -32,6 +36,23 @@ R_MODEL_METHODS = [
 ]
+class ModelPipeline:
+    """Helper class for chaining models together
+    This shouldn't be used directly, it just gets used when chaining models with the
+    `|` operator since you can't use a list directly - you must use a type that
+    overloads the `|` operator.
+    """
+    def __init__(self, models):
+        self.models = models
+    def __or__(self, other):
+        self.models.append(other)
+        return self
 @dataclass
 class ModelAttributes:
     """
@@ -41,51 +62,67 @@ class ModelAttributes:
     architecture: str = None
     framework: str = None
     framework_version: str = None
+    language: str = None
+    @classmethod
+    def from_dict(cls, data):
+        """
+        Creates a ModelAttributes instance from a dictionary
+        """
+        return cls(
+            architecture=data.get("architecture"),
+            framework=data.get("framework"),
+            framework_version=data.get("framework_version"),
+            language=data.get("language"),
+        )
-class VMModel:
+class VMModel(ABC):
     """
     An base class that wraps a trained model instance and its associated data.
     Attributes:
-        attributes (ModelAttributes, optional): The attributes of the model. Defaults to None.
         model (object, optional): The trained model instance. Defaults to None.
-        device_type(str, optional) The device where model is trained
+        input_id (str, optional): The input ID for the model. Defaults to None.
+        attributes (ModelAttributes, optional): The attributes of the model. Defaults to None.
+        name (str, optional): The name of the model. Defaults to the class name.
     """
-    input_id: str = None
     def __init__(
         self,
         input_id: str = None,
         model: object = None,
         attributes: ModelAttributes = None,
+        name: str = None,
+        **kwargs,
     ):
-        self._model = model
-        self._input_id = input_id
-        self._attributes = attributes
+        self.model = model
+        self.input_id = input_id
-        # The device where model is trained
-        self._device_type = None
+        self.language = "Python"
+        self.library = self.__class__.__name__
+        self.library_version = "N/A"
+        self.class_ = self.__class__.__name__
-    @property
-    def attributes(self):
-        return self._attributes
+        self.name = name or self.__class__.__name__
-    @property
-    def input_id(self):
-        return self._input_id
+        self.attributes = attributes
-    @property
-    def model(self):
-        return self._model
+        # set any additional attributes passed in (likely for subclasses)
+        for key, value in kwargs.items():
+            setattr(self, key, value)
-    @property
-    def device_type(self):
-        """
-        The device where model is trained
-        """
-        return self._device_type
+        self.__post_init__()
+    def __post_init__(self):
+        """Allows child classes to add their own post-init logic"""
+        pass
+    def __or__(self, other):
+        if not isinstance(other, VMModel):
+            raise ValueError("Can only chain VMModel objects")
+        return ModelPipeline([self, other])
     def serialize(self):
         """
@@ -95,13 +132,11 @@ class VMModel:
             "attributes": self.attributes.__dict__,
         }
-    @abstractmethod
     def predict_proba(self, *args, **kwargs):
-        """
-        Predict probability for the model.
-        This is a wrapper around the model's if available
-        """
-        pass
+        """Predict probabilties - must be implemented by subclass if needed"""
+        raise MissingOrInvalidModelPredictFnError(
+            "`predict_proba()` method not implemented for this model"
+        )
     @abstractmethod
     def predict(self, *args, **kwargs):
@@ -110,42 +145,6 @@ class VMModel:
         """
         pass
-    @abstractmethod
-    def model_language(self, *args, **kwargs):
-        """
-        Programming language used to train the model. Assume Python if this
-        method is not implemented
-        """
-        pass
-    @abstractmethod
-    def model_library(self, *args, **kwargs):
-        """
-        Model framework library
-        """
-        pass
-    @abstractmethod
-    def model_library_version(self, *args, **kwargs):
-        """
-        Model framework library version
-        """
-        pass
-    @abstractmethod
-    def model_class(self, *args, **kwargs):
-        """
-        Predict method for the model. This is a wrapper around the model's
-        """
-        pass
-    @abstractmethod
-    def model_name(self, *args, **kwargs):
-        """
-        Model name
-        """
-        pass
 def has_method_with_arguments(cls, method_name, n_args):
     if not hasattr(cls, method_name):
@@ -195,11 +194,17 @@ def model_module(model):
     return module
-def get_model_class(model):
-    model_class_name = SUPPORTED_LIBRARIES.get(model_module(model), None)
+def get_model_class(model, predict_fn=None):
+    # TODO: more consistent way to determine this?!
+    if predict_fn is not None:
+        model_class_name = SUPPORTED_LIBRARIES["function"]
+    elif isinstance(model, ModelPipeline):
+        model_class_name = SUPPORTED_LIBRARIES["pipeline"]
+    else:
+        model_class_name = SUPPORTED_LIBRARIES.get(model_module(model), None)
-    if model_class_name is None:
-        raise Exception("Model library not supported")
+    if not model_class_name:
+        return None
     model_class = getattr(
         importlib.import_module("validmind.models"),
@@ -207,3 +212,23 @@ def get_model_class(model):
     )
     return model_class
+def is_model_metadata(model):
+    """
+    Checks if the model is a dictionary containing metadata about a model.
+    We want to check if the metadata dictionary contains at least the following keys:
+    - architecture
+    - language
+    """
+    if not isinstance(model, dict):
+        return False
+    if "architecture" not in model:
+        return False
+    if "language" not in model:
+        return False
+    return True

validmind/vm_models/test/result_wrapper.py CHANGED Viewed

@@ -12,20 +12,23 @@ from abc import ABC, abstractmethod
 from dataclasses import dataclass
 from typing import Dict, List, Optional, Union
-import ipywidgets as widgets
-import mistune
 import pandas as pd
-from IPython.display import display
+from ipywidgets import HTML, GridBox, Layout, VBox
 from ... import api_client
 from ...ai import DescriptionFuture
-from ...utils import NumpyEncoder, run_async, test_id_to_name
+from ...input_registry import input_registry
+from ...logging import get_logger
+from ...utils import NumpyEncoder, display, md_to_html, run_async, test_id_to_name
+from ..dataset import VMDataset
 from ..figure import Figure
 from .metric_result import MetricResult
 from .output_template import OutputTemplate
 from .result_summary import ResultSummary
 from .threshold_test_result import ThresholdTestResults
+logger = get_logger(__name__)
 async def update_metadata(content_id: str, text: str, _json: Union[Dict, List] = None):
     """
@@ -64,9 +67,9 @@ def plot_figures(figures: List[Figure]) -> None:
     plots = [figure.to_widget() for figure in figures]
     num_columns = 2 if len(figures) > 1 else 1
-    return widgets.GridBox(
+    return GridBox(
         plots,
-        layout=widgets.Layout(grid_template_columns=f"repeat({num_columns}, 1fr)"),
+        layout=Layout(grid_template_columns=f"repeat({num_columns}, 1fr)"),
     )
@@ -103,7 +106,7 @@ class ResultWrapper(ABC):
         """
         Convert a markdown string to html
         """
-        return mistune.html(description)
+        return md_to_html(description)
     def _summary_tables_to_widget(self, summary: ResultSummary):
         """
@@ -148,8 +151,8 @@ class ResultWrapper(ABC):
             )  # table.data is an orient=records dump
             if table.metadata and table.metadata.title:
-                tables.append(widgets.HTML(value=f"<h3>{table.metadata.title}</h3>"))
-            tables.append(widgets.HTML(value=summary_table))
+                tables.append(HTML(value=f"<h3>{table.metadata.title}</h3>"))
+            tables.append(HTML(value=summary_table))
         return tables
     def show(self):
@@ -180,9 +183,7 @@ class FailedResultWrapper(ResultWrapper):
         return f'FailedResult(result_id="{self.result_id}")'
     def to_widget(self):
-        return widgets.HTML(
-            value=f"<h3 style='color: red;'>{self.message}</h3><p>{self.error}</p>"
-        )
+        return HTML(f"<h3 style='color: red;'>{self.message}</h3><p>{self.error}</p>")
     async def log_async(self):
         pass
@@ -216,7 +217,7 @@ class MetricResultWrapper(ResultWrapper):
             return ""
         vbox_children = [
-            widgets.HTML(value=f"<h1>{test_id_to_name(self.result_id)}</h1>"),
+            HTML(value=f"<h1>{test_id_to_name(self.result_id)}</h1>"),
         ]
         if self.result_metadata:
@@ -226,9 +227,7 @@ class MetricResultWrapper(ResultWrapper):
                 self.result_metadata[0]["text"] = metric_description
             vbox_children.append(
-                widgets.HTML(
-                    value=self._markdown_description_to_html(metric_description)
-                )
+                HTML(value=self._markdown_description_to_html(metric_description))
             )
         if self.metric:
@@ -236,18 +235,18 @@ class MetricResultWrapper(ResultWrapper):
                 rendered_output = OutputTemplate(self.output_template).render(
                     value=self.metric.value
                 )
-                vbox_children.append(widgets.HTML(rendered_output))
+                vbox_children.append(HTML(rendered_output))
             elif self.metric.summary:
                 tables = self._summary_tables_to_widget(self.metric.summary)
                 vbox_children.extend(tables)
         if self.figures:
-            vbox_children.append(widgets.HTML(value="<h3>Plots</h3>"))
+            vbox_children.append(HTML(value="<h3>Plots</h3>"))
             plot_widgets = plot_figures(self.figures)
             vbox_children.append(plot_widgets)
         vbox_children.append(
-            widgets.HTML(
+            HTML(
                 value="""
         <style>
             .metric-result {
@@ -293,12 +292,48 @@ class MetricResultWrapper(ResultWrapper):
             )
         )
-        return widgets.VBox(vbox_children)
+        return VBox(vbox_children)
-    async def log_async(self):
+    def _get_filtered_summary(self):
+        """Check if the metric summary has columns from input datasets"""
+        dataset_columns = set()
+        for input_id in self.inputs:
+            input_obj = input_registry.get(input_id)
+            if isinstance(input_obj, VMDataset):
+                dataset_columns.update(input_obj.columns)
+        for table in [*self.metric.summary.results]:
+            columns = set()
+            if isinstance(table.data, pd.DataFrame):
+                columns.update(table.data.columns)
+            elif isinstance(table.data, list):
+                columns.update(table.data[0].keys())
+            else:
+                raise ValueError("Invalid data type in summary table")
+            if bool(columns.intersection(dataset_columns)):
+                logger.warning(
+                    "Sensitive data in metric summary table. Not logging to API automatically."
+                    " Pass `unsafe=True` to result.log() method to override manually."
+                )
+                logger.warning(
+                    f"The following columns are present in the table: {columns}"
+                    f" and also present in the dataset: {dataset_columns}"
+                )
+                self.metric.summary.results.remove(table)
+        return self.metric.summary
+    async def log_async(self, unsafe=False):
         tasks = []  # collect tasks to run in parallel (async)
         if self.metric:
+            if self.metric.summary and not unsafe:
+                self.metric.summary = self._get_filtered_summary()
             tasks.append(
                 api_client.log_metrics(
                     metrics=[self.metric],
@@ -306,8 +341,10 @@ class MetricResultWrapper(ResultWrapper):
                     output_template=self.output_template,
                 )
             )
         if self.figures:
             tasks.append(api_client.log_figures(self.figures))
         if hasattr(self, "result_metadata") and self.result_metadata:
             description = self.result_metadata[0].get("text", "")
             if isinstance(description, DescriptionFuture):
@@ -383,18 +420,18 @@ class ThresholdTestResultWrapper(ResultWrapper):
             """
         )
-        vbox_children.append(widgets.HTML(value="".join(description_html)))
+        vbox_children.append(HTML(value="".join(description_html)))
         if self.test_results.summary:
             tables = self._summary_tables_to_widget(self.test_results.summary)
             vbox_children.extend(tables)
         if self.figures:
-            vbox_children.append(widgets.HTML(value="<h3>Plots</h3>"))
+            vbox_children.append(HTML(value="<h3>Plots</h3>"))
             plot_widgets = plot_figures(self.figures)
             vbox_children.append(plot_widgets)
-        return widgets.VBox(vbox_children)
+        return VBox(vbox_children)
     async def log_async(self):
         tasks = [api_client.log_test_result(self.test_results, self.inputs)]

validmind/vm_models/test_context.py CHANGED Viewed

@@ -20,7 +20,7 @@ from validmind.input_registry import input_registry
 from ..errors import MissingRequiredTestInputError
 from ..logging import get_logger
-from .dataset import VMDataset
+from .dataset.dataset import VMDataset
 from .model import VMModel
 # More human readable context names for error messages

validmind/vm_models/test_suite/summary.py CHANGED Viewed

@@ -6,10 +6,9 @@ from dataclasses import dataclass
 from typing import List, Optional
 import ipywidgets as widgets
-import mistune
-from IPython.display import display
 from ...logging import get_logger
+from ...utils import display, md_to_html
 from ..test.result_wrapper import FailedResultWrapper
 from .test_suite import TestSuiteSection, TestSuiteTest
@@ -36,7 +35,7 @@ class TestSuiteSectionSummary:
         self._build_summary()
     def _add_description(self):
-        description = f'<div class="result">{mistune.html(self.description)}</div>'
+        description = f'<div class="result">{md_to_html(self.description)}</div>'
         self._widgets.append(widgets.HTML(value=description))
     def _add_tests_summary(self):
@@ -101,7 +100,7 @@ class TestSuiteSummary:
         self._widgets.append(widgets.HTML(value=results_link))
     def _add_description(self):
-        description = f'<div class="result">{mistune.html(self.description)}</div>'
+        description = f'<div class="result">{md_to_html(self.description)}</div>'
         self._widgets.append(widgets.HTML(value=description))
     def _add_sections_summary(self):

{validmind-2.1.0.dist-info → validmind-2.2.2.dist-info}/METADATA RENAMED Viewed

@@ -1,14 +1,13 @@
 Metadata-Version: 2.1
 Name: validmind
-Version: 2.1.0
+Version: 2.2.2
 Summary: ValidMind Developer Framework
 License: Commercial License
 Author: Andres Rodriguez
 Author-email: andres@validmind.ai
-Requires-Python: >=3.8,<3.12
+Requires-Python: >=3.8.1,<3.12
 Classifier: License :: Other/Proprietary License
 Classifier: Programming Language :: Python :: 3
-Classifier: Programming Language :: Python :: 3.8
 Classifier: Programming Language :: Python :: 3.9
 Classifier: Programming Language :: Python :: 3.10
 Classifier: Programming Language :: Python :: 3.11
@@ -26,6 +25,7 @@ Requires-Dist: evaluate (>=0.4.0,<0.5.0)
 Requires-Dist: ipywidgets (>=8.0.6,<9.0.0)
 Requires-Dist: kaleido (>=0.2.1,<0.3.0,!=0.2.1.post1)
 Requires-Dist: langdetect (>=1.0.9,<2.0.0)
+Requires-Dist: latex2mathml (>=3.77.0,<4.0.0)
 Requires-Dist: levenshtein (>=0.21.1,<0.22.0) ; extra == "all" or extra == "llm"
 Requires-Dist: llvmlite (>=0.42.0) ; python_version >= "3.12"
 Requires-Dist: llvmlite ; python_version >= "3.8" and python_full_version <= "3.11.0"
@@ -43,6 +43,7 @@ Requires-Dist: polars (>=0.20.15,<0.21.0)
 Requires-Dist: pycocoevalcap (>=1.2,<2.0) ; extra == "all" or extra == "llm"
 Requires-Dist: pypmml (>=0.9.17,<0.10.0)
 Requires-Dist: python-dotenv (>=0.20.0,<0.21.0)
+Requires-Dist: ragas (>=0.1.7,<0.2.0)
 Requires-Dist: rouge (>=1.0.1,<2.0.0)
 Requires-Dist: rpy2 (>=3.5.10,<4.0.0) ; extra == "all" or extra == "r-support"
 Requires-Dist: scikit-learn (>=1.0.2,<2.0.0)
@@ -55,6 +56,7 @@ Requires-Dist: sentry-sdk (>=1.24.0,<2.0.0)
 Requires-Dist: shap (>=0.42.0,<0.43.0)
 Requires-Dist: statsmodels (>=0.13.5,<0.14.0)
 Requires-Dist: tabulate (>=0.8.9,<0.9.0)
+Requires-Dist: textblob (>=0.18.0.post0,<0.19.0)
 Requires-Dist: textstat (>=0.7.3,<0.8.0)
 Requires-Dist: torch (>=1.10.0) ; extra == "all" or extra == "llm" or extra == "pytorch"
 Requires-Dist: torchmetrics (>=1.1.1,<2.0.0) ; extra == "all" or extra == "llm"

validmind 2.1.0__py3-none-any.whl → 2.2.2__py3-none-any.whl

validmind 2.1.0py3-none-any.whl → 2.2.2py3-none-any.whl