PyPI - validmind - Versions diffs - 2.8.12__py3-none-any.whl → 2.8.20__py3-none-any.whl - Mend

validmind 2.8.12py3-none-any.whl → 2.8.20py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (61) hide show

validmind/__init__.py +6 -5
validmind/__version__.py +1 -1
validmind/ai/test_descriptions.py +13 -9
validmind/ai/utils.py +2 -2
validmind/api_client.py +75 -32
validmind/client.py +108 -100
validmind/client_config.py +3 -3
validmind/datasets/classification/__init__.py +7 -3
validmind/datasets/credit_risk/lending_club.py +28 -16
validmind/datasets/nlp/cnn_dailymail.py +10 -4
validmind/datasets/regression/__init__.py +22 -5
validmind/errors.py +17 -7
validmind/input_registry.py +1 -1
validmind/logging.py +44 -35
validmind/models/foundation.py +2 -2
validmind/models/function.py +10 -3
validmind/template.py +30 -22
validmind/test_suites/__init__.py +2 -2
validmind/tests/_store.py +13 -4
validmind/tests/comparison.py +65 -33
validmind/tests/data_validation/ClassImbalance.py +3 -1
validmind/tests/data_validation/DatasetDescription.py +2 -23
validmind/tests/data_validation/DescriptiveStatistics.py +1 -1
validmind/tests/data_validation/Skewness.py +7 -6
validmind/tests/decorator.py +14 -11
validmind/tests/load.py +38 -24
validmind/tests/model_validation/ragas/AnswerCorrectness.py +4 -2
validmind/tests/model_validation/ragas/ContextEntityRecall.py +4 -2
validmind/tests/model_validation/ragas/ContextPrecision.py +4 -2
validmind/tests/model_validation/ragas/ContextPrecisionWithoutReference.py +4 -2
validmind/tests/model_validation/ragas/ContextRecall.py +4 -2
validmind/tests/model_validation/ragas/Faithfulness.py +4 -2
validmind/tests/model_validation/ragas/ResponseRelevancy.py +4 -2
validmind/tests/model_validation/ragas/SemanticSimilarity.py +4 -2
validmind/tests/model_validation/sklearn/ClassifierThresholdOptimization.py +13 -3
validmind/tests/model_validation/sklearn/OverfitDiagnosis.py +3 -1
validmind/tests/model_validation/sklearn/SHAPGlobalImportance.py +28 -25
validmind/tests/model_validation/sklearn/WeakspotsDiagnosis.py +15 -10
validmind/tests/output.py +66 -11
validmind/tests/run.py +28 -14
validmind/tests/test_providers.py +28 -35
validmind/tests/utils.py +17 -4
validmind/unit_metrics/__init__.py +1 -1
validmind/utils.py +295 -31
validmind/vm_models/dataset/dataset.py +19 -16
validmind/vm_models/dataset/utils.py +5 -3
validmind/vm_models/figure.py +6 -6
validmind/vm_models/input.py +6 -5
validmind/vm_models/model.py +5 -5
validmind/vm_models/result/result.py +122 -43
validmind/vm_models/result/utils.py +5 -5
validmind/vm_models/test_suite/__init__.py +5 -0
validmind/vm_models/test_suite/runner.py +5 -5
validmind/vm_models/test_suite/summary.py +20 -2
validmind/vm_models/test_suite/test.py +6 -6
validmind/vm_models/test_suite/test_suite.py +10 -10
{validmind-2.8.12.dist-info → validmind-2.8.20.dist-info}/METADATA +3 -4
{validmind-2.8.12.dist-info → validmind-2.8.20.dist-info}/RECORD +61 -60
{validmind-2.8.12.dist-info → validmind-2.8.20.dist-info}/WHEEL +1 -1
{validmind-2.8.12.dist-info → validmind-2.8.20.dist-info}/LICENSE +0 -0
{validmind-2.8.12.dist-info → validmind-2.8.20.dist-info}/entry_points.txt +0 -0

validmind/utils.py CHANGED Viewed

@@ -12,7 +12,7 @@ import sys
 import warnings
 from datetime import date, datetime, time
 from platform import python_version
-from typing import Any, Dict, List
+from typing import Any, Awaitable, Callable, Dict, List, Optional, TypeVar
 import matplotlib.pylab as pylab
 import mistune
@@ -20,6 +20,7 @@ import nest_asyncio
 import numpy as np
 import pandas as pd
 import seaborn as sns
+from bs4 import BeautifulSoup
 from IPython.core import getipython
 from IPython.display import HTML
 from IPython.display import display as ipy_display
@@ -59,23 +60,25 @@ pylab.rcParams.update(params)
 logger = get_logger(__name__)
+T = TypeVar("T")
 def parse_version(version: str) -> tuple[int, ...]:
     """
-    Parse a semver version string into a tuple of major, minor, patch integers
+    Parse a semver version string into a tuple of major, minor, patch integers.
     Args:
-        version (str): The semantic version string to parse
+        version (str): The semantic version string to parse.
     Returns:
-        tuple[int, ...]: A tuple of major, minor, patch integers
+        tuple[int, ...]: A tuple of major, minor, patch integers.
     """
     return tuple(int(x) for x in version.split(".")[:3])
 def is_notebook() -> bool:
     """
-    Checks if the code is running in a Jupyter notebook or IPython shell
+    Checks if the code is running in a Jupyter notebook or IPython shell.
     https://stackoverflow.com/questions/15411967/how-can-i-check-if-code-is-executed-in-the-ipython-notebook
     """
@@ -209,9 +212,7 @@ class HumanReadableEncoder(NumpyEncoder):
 def get_full_typename(o: Any) -> Any:
-    """We determine types based on type names so we don't have to import
-    (and therefore depend on) PyTorch, TensorFlow, etc.
-    """
+    """We determine types based on type names so we don't have to import."""
     instance_name = o.__class__.__module__ + "." + o.__class__.__name__
     if instance_name in ["builtins.module", "__builtin__.module"]:
         return o.__name__
@@ -313,9 +314,9 @@ def format_key_values(key_values: Dict[str, Any]) -> Dict[str, Any]:
 def summarize_data_quality_results(results):
     """
-    TODO: generalize this to work with metrics and test results
+    TODO: generalize this to work with metrics and test results.
-    Summarize the results of the data quality test suite
+    Summarize the results of the data quality test suite.
     """
     test_results = []
     for result in results:
@@ -354,25 +355,31 @@ def format_number(number):
 def format_dataframe(df: pd.DataFrame) -> pd.DataFrame:
-    """Format a pandas DataFrame for display purposes"""
+    """Format a pandas DataFrame for display purposes."""
     df = df.style.set_properties(**{"text-align": "left"}).hide(axis="index")
     return df.set_table_styles([dict(selector="th", props=[("text-align", "left")])])
-def run_async(func, *args, name=None, **kwargs):
-    """Helper function to run functions asynchronously
+def run_async(
+    func: Callable[..., Awaitable[T]],
+    *args: Any,
+    name: Optional[str] = None,
+    **kwargs: Any,
+) -> T:
+    """Helper function to run functions asynchronously.
     This takes care of the complexity of running the logging functions asynchronously. It will
-    detect the type of environment we are running in (ipython notebook or not) and run the
+    detect the type of environment we are running in (IPython notebook or not) and run the
     function accordingly.
     Args:
-        func (function): The function to run asynchronously
-        *args: The arguments to pass to the function
-        **kwargs: The keyword arguments to pass to the function
+        func: The function to run asynchronously.
+        *args: The arguments to pass to the function.
+        name: Optional name for the task.
+        **kwargs: The keyword arguments to pass to the function.
     Returns:
-        The result of the function
+        The result of the function.
     """
     try:
         if asyncio.get_event_loop().is_running() and is_notebook():
@@ -390,8 +397,19 @@ def run_async(func, *args, name=None, **kwargs):
     return asyncio.get_event_loop().run_until_complete(func(*args, **kwargs))
-def run_async_check(func, *args, **kwargs):
-    """Helper function to run functions asynchronously if the task doesn't already exist"""
+def run_async_check(
+    func: Callable[..., Awaitable[T]], *args: Any, **kwargs: Any
+) -> Optional[asyncio.Task[T]]:
+    """Helper function to run functions asynchronously if the task doesn't already exist.
+    Args:
+        func: The function to run asynchronously.
+        *args: The arguments to pass to the function.
+        **kwargs: The keyword arguments to pass to the function.
+    Returns:
+        Optional[asyncio.Task[T]]: The task if created or found, None otherwise.
+    """
     if __loop:
         return  # we don't need this if we are using our own loop
@@ -408,16 +426,16 @@ def run_async_check(func, *args, **kwargs):
         pass
-def fuzzy_match(string: str, search_string: str, threshold=0.7):
-    """Check if a string matches another string using fuzzy matching
+def fuzzy_match(string: str, search_string: str, threshold: float = 0.7) -> bool:
+    """Check if a string matches another string using fuzzy matching.
     Args:
-        string (str): The string to check
-        search_string (str): The string to search for
-        threshold (float): The similarity threshold to use (Default: 0.7)
+        string (str): The string to check.
+        search_string (str): The string to search for.
+        threshold (float): The similarity threshold to use (Default: 0.7).
     Returns:
-        True if the string matches the search string, False otherwise
+        bool: True if the string matches the search string, False otherwise.
     """
     score = difflib.SequenceMatcher(None, string, search_string).ratio()
@@ -448,7 +466,7 @@ def test_id_to_name(test_id: str) -> str:
 def get_model_info(model):
-    """Attempts to extract all model info from a model object instance"""
+    """Attempts to extract all model info from a model object instance."""
     architecture = model.name
     framework = model.library
     framework_version = model.library_version
@@ -472,7 +490,7 @@ def get_model_info(model):
 def get_dataset_info(dataset):
-    """Attempts to extract all dataset info from a dataset object instance"""
+    """Attempts to extract all dataset info from a dataset object instance."""
     num_rows, num_cols = dataset.df.shape
     schema = dataset.df.dtypes.apply(lambda x: x.name).to_dict()
     description = (
@@ -491,7 +509,7 @@ def preview_test_config(config):
     """Preview test configuration in a collapsible HTML section.
     Args:
-        config (dict): Test configuration dictionary
+        config (dict): Test configuration dictionary.
     """
     try:
@@ -515,7 +533,7 @@ def preview_test_config(config):
 def display(widget_or_html, syntax_highlighting=True, mathjax=True):
-    """Display widgets with extra goodies (syntax highlighting, MathJax, etc.)"""
+    """Display widgets with extra goodies (syntax highlighting, MathJax, etc.)."""
     if isinstance(widget_or_html, str):
         ipy_display(HTML(widget_or_html))
         # if html we can auto-detect if we actually need syntax highlighting or MathJax
@@ -532,7 +550,7 @@ def display(widget_or_html, syntax_highlighting=True, mathjax=True):
 def md_to_html(md: str, mathml=False) -> str:
-    """Converts Markdown to HTML using mistune with plugins"""
+    """Converts Markdown to HTML using mistune with plugins."""
     # use mistune with math plugin to convert to html
     html = mistune.create_markdown(
         plugins=["math", "table", "strikethrough", "footnotes"]
@@ -559,6 +577,63 @@ def md_to_html(md: str, mathml=False) -> str:
     return html
+def is_html(text: str) -> bool:
+    """Check if a string is HTML.
+    Uses more robust heuristics to determine if a string contains HTML content.
+    Args:
+        text (str): The string to check
+    Returns:
+        bool: True if the string likely contains HTML, False otherwise
+    """
+    # Strip whitespace first
+    text = text.strip()
+    # Basic check: Must at least start with < and end with >
+    if not (text.startswith("<") and text.endswith(">")):
+        return False
+    # Look for common HTML tags
+    common_html_patterns = [
+        r"<html.*?>",  # HTML tag
+        r"<body.*?>",  # Body tag
+        r"<div.*?>",  # Div tag
+        r"<p>.*?</p>",  # Paragraph with content
+        r"<h[1-6]>.*?</h[1-6]>",  # Headers
+        r"<script.*?>",  # Script tags
+        r"<style.*?>",  # Style tags
+        r"<a href=.*?>",  # Links
+        r"<img.*?>",  # Images
+        r"<table.*?>",  # Tables
+        r"<!DOCTYPE html>",  # DOCTYPE declaration
+    ]
+    for pattern in common_html_patterns:
+        if re.search(pattern, text, re.IGNORECASE | re.DOTALL):
+            return True
+    # If we have at least 2 matching tags, it's likely HTML
+    # This helps detect custom elements or patterns not in our list
+    tags = re.findall(r"</?[a-zA-Z][a-zA-Z0-9]*.*?>", text)
+    if len(tags) >= 2:
+        return True
+    # Try parsing with BeautifulSoup as a last resort
+    try:
+        soup = BeautifulSoup(text, "html.parser")
+        # If we find any tags that weren't in the original text, BeautifulSoup
+        # likely tried to fix broken HTML, meaning it's not valid HTML
+        return len(soup.find_all()) > 0
+    except Exception as e:
+        logger.error(f"Error checking if text is HTML: {e}")
+        return False
+    return False
 def inspect_obj(obj):
     # Filtering only attributes
     print(len("Attributes:") * "-")
@@ -601,3 +676,192 @@ def serialize(obj):
     elif isinstance(obj, (pd.DataFrame, pd.Series)):
         return ""  # Simple empty string for non-serializable objects
     return obj
+def is_text_column(series, threshold=0.05) -> bool:
+    """
+    Determines if a series is likely to contain text data using heuristics.
+    Args:
+        series (pd.Series): The pandas Series to analyze
+        threshold (float): The minimum threshold to classify a pattern match as significant
+    Returns:
+        bool: True if the series likely contains text data, False otherwise
+    """
+    # Filter to non-null string values and sample if needed
+    string_series = series.dropna().astype(str)
+    if len(string_series) == 0:
+        return False
+    if len(string_series) > 1000:
+        string_series = string_series.sample(1000, random_state=42)
+    # Calculate basic metrics
+    total_values = len(string_series)
+    unique_ratio = len(string_series.unique()) / total_values if total_values > 0 else 0
+    avg_length = string_series.str.len().mean()
+    avg_words = string_series.str.split(r"\s+").str.len().mean()
+    # Check for special text patterns
+    patterns = {
+        "url": r"https?://\S+|www\.\S+",
+        "email": r"\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b",
+        "filepath": r'(?:[a-zA-Z]:|[\\/])(?:[\\/][^\\/:*?"<>|]+)+',
+    }
+    # Check if any special patterns exceed threshold
+    for pattern in patterns.values():
+        if string_series.str.contains(pattern, regex=True, na=False).mean() > threshold:
+            return True
+    # Calculate proportion of alphabetic characters
+    total_chars = string_series.str.len().sum()
+    if total_chars > 0:
+        alpha_ratio = string_series.str.count(r"[a-zA-Z]").sum() / total_chars
+    else:
+        alpha_ratio = 0
+    # Check for free-form text indicators
+    text_indicators = [
+        unique_ratio > 0.8 and avg_length > 20,  # High uniqueness and long strings
+        unique_ratio > 0.4
+        and avg_length > 15
+        and string_series.str.contains(r"[.,;:!?]", regex=True, na=False).mean()
+        > 0.3,  # Moderate uniqueness with punctuation
+        string_series.str.contains(
+            r"\b\w+\b\s+\b\w+\b\s+\b\w+\b\s+\b\w+\b", regex=True, na=False
+        ).mean()
+        > 0.3,  # Contains long phrases
+        avg_words > 5 and alpha_ratio > 0.6,  # Many words with mostly letters
+        unique_ratio > 0.95 and avg_length > 10,  # Very high uniqueness
+    ]
+    return any(text_indicators)
+def _get_numeric_type_detail(column, dtype, series):
+    """Helper function to determine numeric type details."""
+    if pd.api.types.is_integer_dtype(dtype):
+        return {"type": "Numeric", "subtype": "Integer"}
+    elif pd.api.types.is_float_dtype(dtype):
+        return {"type": "Numeric", "subtype": "Float"}
+    else:
+        return {"type": "Numeric", "subtype": "Other"}
+def _get_text_type_detail(series):
+    """Helper function to determine text/categorical type details."""
+    string_series = series.dropna().astype(str)
+    if len(string_series) == 0:
+        return {"type": "Categorical"}
+    # Check for common patterns
+    url_pattern = r"https?://\S+|www\.\S+"
+    email_pattern = r"\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b"
+    filepath_pattern = r'(?:[a-zA-Z]:|[\\/])(?:[\\/][^\\/:*?"<>|]+)+'
+    url_ratio = string_series.str.contains(url_pattern, regex=True, na=False).mean()
+    email_ratio = string_series.str.contains(email_pattern, regex=True, na=False).mean()
+    filepath_ratio = string_series.str.contains(
+        filepath_pattern, regex=True, na=False
+    ).mean()
+    # Check if general text using enhanced function
+    if url_ratio > 0.7:
+        return {"type": "Text", "subtype": "URL"}
+    elif email_ratio > 0.7:
+        return {"type": "Text", "subtype": "Email"}
+    elif filepath_ratio > 0.7:
+        return {"type": "Text", "subtype": "Path"}
+    elif is_text_column(series):
+        return {"type": "Text", "subtype": "FreeText"}
+    # Must be categorical
+    n_unique = series.nunique()
+    if n_unique == 2:
+        return {"type": "Categorical", "subtype": "Binary"}
+    else:
+        return {"type": "Categorical", "subtype": "Nominal"}
+def get_column_type_detail(df, column) -> dict:
+    """
+    Get detailed column type information beyond basic type detection.
+    Similar to ydata-profiling's type system.
+    Args:
+        df (pd.DataFrame): DataFrame containing the column
+        column (str): Column name to analyze
+    Returns:
+        dict: Detailed type information including primary type and subtype
+    """
+    series = df[column]
+    dtype = series.dtype
+    # Initialize result with id and basic type
+    result = {"id": column, "type": "Unknown"}
+    # Determine type details based on dtype
+    type_detail = None
+    if pd.api.types.is_numeric_dtype(dtype):
+        type_detail = _get_numeric_type_detail(column, dtype, series)
+    elif pd.api.types.is_bool_dtype(dtype):
+        type_detail = {"type": "Boolean"}
+    elif pd.api.types.is_datetime64_any_dtype(dtype):
+        type_detail = {"type": "Datetime"}
+    elif pd.api.types.is_categorical_dtype(dtype) or pd.api.types.is_object_dtype(
+        dtype
+    ):
+        type_detail = _get_text_type_detail(series)
+    # Update result with type details
+    if type_detail:
+        result.update(type_detail)
+    return result
+def infer_datatypes(df, detailed=False) -> list:
+    """
+    Infer data types for columns in a DataFrame.
+    Args:
+        df (pd.DataFrame): DataFrame to analyze
+        detailed (bool): Whether to return detailed type information including subtypes
+    Returns:
+        list: Column type mappings
+    """
+    if detailed:
+        return [get_column_type_detail(df, column) for column in df.columns]
+    column_type_mappings = {}
+    # Use pandas to infer data types
+    for column in df.columns:
+        # Check if all values are None
+        if df[column].isna().all():
+            column_type_mappings[column] = {"id": column, "type": "Null"}
+            continue
+        dtype = df[column].dtype
+        if pd.api.types.is_numeric_dtype(dtype):
+            column_type_mappings[column] = {"id": column, "type": "Numeric"}
+        elif pd.api.types.is_bool_dtype(dtype):
+            column_type_mappings[column] = {"id": column, "type": "Boolean"}
+        elif pd.api.types.is_datetime64_any_dtype(dtype):
+            column_type_mappings[column] = {"id": column, "type": "Datetime"}
+        elif pd.api.types.is_categorical_dtype(dtype) or pd.api.types.is_object_dtype(
+            dtype
+        ):
+            # Check if this is more likely to be text than categorical
+            if is_text_column(df[column]):
+                column_type_mappings[column] = {"id": column, "type": "Text"}
+            else:
+                column_type_mappings[column] = {"id": column, "type": "Categorical"}
+        else:
+            column_type_mappings[column] = {"id": column, "type": "Unsupported"}
+    return list(column_type_mappings.values())

validmind/vm_models/dataset/dataset.py CHANGED Viewed

@@ -8,6 +8,7 @@ Dataset class wrapper
 import warnings
 from copy import deepcopy
+from typing import Any, Dict, List, Optional
 import numpy as np
 import pandas as pd
@@ -24,9 +25,9 @@ logger = get_logger(__name__)
 class VMDataset(VMInput):
-    """Base class for VM datasets
+    """Base class for VM datasets.
-    Child classes should be used to support new dataset types (tensor, polars etc)
+    Child classes should be used to support new dataset types (tensor, polars etc.)
     by converting the user's dataset into a numpy array collecting metadata like
     column names and then call this (parent) class `__init__` method.
@@ -200,7 +201,7 @@ class VMDataset(VMInput):
                 "Cannot use precomputed probabilities without precomputed predictions"
             )
-    def with_options(self, **kwargs) -> "VMDataset":
+    def with_options(self, **kwargs: Dict[str, Any]) -> "VMDataset":
         """Support options provided when passing an input to run_test or run_test_suite
         Example:
@@ -253,23 +254,25 @@ class VMDataset(VMInput):
     def assign_predictions(
         self,
         model: VMModel,
-        prediction_column: str = None,
-        prediction_values: list = None,
-        probability_column: str = None,
-        probability_values: list = None,
-        prediction_probabilities: list = None,  # DEPRECATED: use probability_values
-        **kwargs,
-    ):
+        prediction_column: Optional[str] = None,
+        prediction_values: Optional[List[Any]] = None,
+        probability_column: Optional[str] = None,
+        probability_values: Optional[List[float]] = None,
+        prediction_probabilities: Optional[
+            List[float]
+        ] = None,  # DEPRECATED: use probability_values
+        **kwargs: Dict[str, Any],
+    ) -> None:
         """Assign predictions and probabilities to the dataset.
         Args:
             model (VMModel): The model used to generate the predictions.
-            prediction_column (str, optional): The name of the column containing the predictions. Defaults to None.
-            prediction_values (list, optional): The values of the predictions. Defaults to None.
-            probability_column (str, optional): The name of the column containing the probabilities. Defaults to None.
-            probability_values (list, optional): The values of the probabilities. Defaults to None.
-            prediction_probabilities (list, optional): DEPRECATED: The values of the probabilities. Defaults to None.
-            kwargs: Additional keyword arguments that will get passed through to the model's `predict` method.
+            prediction_column (Optional[str]): The name of the column containing the predictions.
+            prediction_values (Optional[List[Any]]): The values of the predictions.
+            probability_column (Optional[str]): The name of the column containing the probabilities.
+            probability_values (Optional[List[float]]): The values of the probabilities.
+            prediction_probabilities (Optional[List[float]]): DEPRECATED: The values of the probabilities.
+            **kwargs: Additional keyword arguments that will get passed through to the model's `predict` method.
         """
         if prediction_probabilities is not None:
             warnings.warn(

validmind/vm_models/dataset/utils.py CHANGED Viewed

@@ -45,11 +45,11 @@ class ExtraColumns:
         )
     def __contains__(self, key):
-        """Allow checking if a key is `in` the extra columns"""
+        """Allow checking if a key is `in` the extra columns."""
         return key in self.flatten()
     def flatten(self) -> List[str]:
-        """Get a list of all column names"""
+        """Get a list of all column names."""
         return [
             self.group_by_column,
             *self.extras,
@@ -78,13 +78,14 @@ class ExtraColumns:
 def as_df(series_or_frame: Union[pd.Series, pd.DataFrame]) -> pd.DataFrame:
+    """Convert a pandas Series or DataFrame to a DataFrame."""
     if isinstance(series_or_frame, pd.Series):
         return series_or_frame.to_frame()
     return series_or_frame
 def _is_probabilties(output):
-    """Check if the output from the predict method is probabilities."""
+    """Check if the output is a probability array."""
     if not isinstance(output, np.ndarray) or output.ndim > 1:
         return False
@@ -98,6 +99,7 @@ def _is_probabilties(output):
 def compute_predictions(model, X, **kwargs) -> tuple:
+    """Compute predictions and probabilities for a model."""
     probability_values = None
     try:

validmind/vm_models/figure.py CHANGED Viewed

@@ -3,7 +3,7 @@
 # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
 """
-Figure objects track the figure schema supported by the ValidMind API
+Figure objects track the figure schema supported by the ValidMind API.
 """
 import base64
@@ -38,7 +38,7 @@ def create_figure(
     key: str,
     ref_id: str,
 ) -> "Figure":
-    """Create a VM Figure object from a raw figure object"""
+    """Create a VM Figure object from a raw figure object."""
     if is_matplotlib_figure(figure) or is_plotly_figure(figure) or is_png_image(figure):
         return Figure(key=key, figure=figure, ref_id=ref_id)
@@ -48,7 +48,7 @@ def create_figure(
 @dataclass
 class Figure:
     """
-    Figure objects track the schema supported by the ValidMind API
+    Figure objects track the schema supported by the ValidMind API.
     """
     key: str
@@ -115,7 +115,7 @@ class Figure:
     def serialize(self):
         """
-        Serializes the Figure to a dictionary so it can be sent to the API
+        Serializes the Figure to a dictionary so it can be sent to the API.
         """
         return {
             "type": self._type,
@@ -125,7 +125,7 @@ class Figure:
     def _get_b64_url(self):
         """
-        Returns a base64 encoded URL for the figure
+        Returns a base64 encoded URL for the figure.
         """
         if is_matplotlib_figure(self.figure):
             buffer = BytesIO()
@@ -152,7 +152,7 @@ class Figure:
         )
     def serialize_files(self):
-        """Creates a `requests`-compatible files object to be sent to the API"""
+        """Creates a `requests`-compatible files object to be sent to the API."""
         if is_matplotlib_figure(self.figure):
             buffer = BytesIO()
             self.figure.savefig(buffer, bbox_inches="tight")

validmind/vm_models/input.py CHANGED Viewed

@@ -5,27 +5,28 @@
 """Base class for ValidMind Input types"""
 from abc import ABC
+from typing import Any, Dict
 class VMInput(ABC):
     """
-    Base class for ValidMind Input types
+    Base class for ValidMind Input types.
     """
-    def with_options(self, **kwargs) -> "VMInput":
+    def with_options(self, **kwargs: Dict[str, Any]) -> "VMInput":
         """
         Allows for setting options on the input object that are passed by the user
-        when using the input to run a test or set of tests
+        when using the input to run a test or set of tests.
         To allow options, just override this method in the subclass (see VMDataset)
         and ensure that it returns a new instance of the input with the specified options
         set.
         Args:
-            **kwargs: Arbitrary keyword arguments that will be passed to the input object
+            **kwargs: Arbitrary keyword arguments that will be passed to the input object.
         Returns:
-            VMInput: A new instance of the input with the specified options set
+            VMInput: A new instance of the input with the specified options set.
         """
         if kwargs:
             raise NotImplementedError("This type of input does not support options")

validmind/vm_models/model.py CHANGED Viewed

@@ -40,7 +40,7 @@ R_MODEL_METHODS = [
 class ModelTask(Enum):
-    """Model task enums"""
+    """Model task enums."""
     # TODO: add more tasks
     CLASSIFICATION = "classification"
@@ -67,7 +67,7 @@ class ModelPipeline:
 @dataclass
 class ModelAttributes:
     """
-    Model attributes definition
+    Model attributes definition.
     """
     architecture: str = None
@@ -79,7 +79,7 @@ class ModelAttributes:
     @classmethod
     def from_dict(cls, data):
         """
-        Creates a ModelAttributes instance from a dictionary
+        Creates a ModelAttributes instance from a dictionary.
         """
         return cls(
             architecture=data.get("architecture"),
@@ -235,8 +235,8 @@ def is_model_metadata(model):
     Checks if the model is a dictionary containing metadata about a model.
     We want to check if the metadata dictionary contains at least the following keys:
-    - architecture
-    - language
+    - Architecture
+    - Language
     """
     if not isinstance(model, dict):
         return False

validmind 2.8.12__py3-none-any.whl → 2.8.20__py3-none-any.whl

validmind 2.8.12py3-none-any.whl → 2.8.20py3-none-any.whl