PyPI - validmind - Versions diffs - 2.8.10__py3-none-any.whl → 2.8.20__py3-none-any.whl - Mend

validmind 2.8.10py3-none-any.whl → 2.8.20py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (189) hide show

validmind/client.py CHANGED Viewed

@@ -6,8 +6,12 @@
 Client interface for all data and model validation functions
 """
+from typing import Any, Callable, Dict, List, Optional, Union
+import numpy as np
 import pandas as pd
 import polars as pl
+import torch
 from .api_client import log_input as log_input
 from .client_config import client_config
@@ -42,20 +46,22 @@ logger = get_logger(__name__)
 def init_dataset(
-    dataset,
-    model=None,
-    index=None,
-    index_name: str = None,
+    dataset: Union[
+        pd.DataFrame, pl.DataFrame, "np.ndarray", "torch.utils.data.TensorDataset"
+    ],
+    model: Optional[VMModel] = None,
+    index: Optional[Any] = None,
+    index_name: Optional[str] = None,
     date_time_index: bool = False,
-    columns: list = None,
-    text_column: str = None,
-    target_column: str = None,
-    feature_columns: list = None,
-    extra_columns: dict = None,
-    class_labels: dict = None,
-    type: str = None,
-    input_id: str = None,
-    __log=True,
+    columns: Optional[List[str]] = None,
+    text_column: Optional[str] = None,
+    target_column: Optional[str] = None,
+    feature_columns: Optional[List[str]] = None,
+    extra_columns: Optional[Dict[str, Any]] = None,
+    class_labels: Optional[Dict[str, Any]] = None,
+    type: Optional[str] = None,
+    input_id: Optional[str] = None,
+    __log: bool = True,
 ) -> VMDataset:
     """
     Initializes a VM Dataset, which can then be passed to other functions
@@ -69,25 +75,30 @@ def init_dataset(
     - Torch TensorDataset
     Args:
-        dataset : dataset from various python libraries
-        model (VMModel): ValidMind model object
-        targets (vm.vm.DatasetTargets): A list of target variables
-        target_column (str): The name of the target column in the dataset
-        feature_columns (list): A list of names of feature columns in the dataset
-        extra_columns (dictionary):  A dictionary containing the names of the
-        prediction_column and group_by_columns in the dataset
-        class_labels (dict): A list of class labels for classification problems
-        type (str): The type of dataset (one of DATASET_TYPES)
-        input_id (str): The input ID for the dataset (e.g. "my_dataset"). By default,
+        dataset: Dataset from various Python libraries.
+        model (VMModel): ValidMind model object.
+        index (Any, optional): Index for the dataset.
+        index_name (str, optional): Name of the index column.
+        date_time_index (bool): Whether the index is a datetime index.
+        columns (List[str], optional): List of column names.
+        text_column (str, optional): Name of the text column.
+        target_column (str, optional): The name of the target column in the dataset.
+        feature_columns (List[str], optional): A list of names of feature columns in the dataset.
+        extra_columns (Dict[str, Any], optional): A dictionary containing the names of the
+            prediction_column and group_by_columns in the dataset.
+        class_labels (Dict[str, Any], optional): A list of class labels for classification problems.
+        type (str, optional): The type of dataset (one of DATASET_TYPES) - DEPRECATED.
+        input_id (str, optional): The input ID for the dataset (e.g. "my_dataset"). By default,
             this will be set to `dataset` but if you are passing this dataset as a
             test input using some other key than `dataset`, then you should set
             this to the same key.
+        __log (bool): Whether to log the input. Defaults to True.
     Raises:
-        ValueError: If the dataset type is not supported
+        ValueError: If the dataset type is not supported.
     Returns:
-        vm.vm.Dataset: A VM Dataset instance
+        vm.vm.Dataset: A VM Dataset instance.
     """
     # Show deprecation notice if type is passed
     if type is not None:
@@ -171,12 +182,12 @@ def init_dataset(
 def init_model(
-    model: object = None,
+    model: Optional[object] = None,
     input_id: str = "model",
-    attributes: dict = None,
-    predict_fn: callable = None,
-    __log=True,
-    **kwargs,
+    attributes: Optional[Dict[str, Any]] = None,
+    predict_fn: Optional[Callable] = None,
+    __log: bool = True,
+    **kwargs: Any,
 ) -> VMModel:
     """
     Initializes a VM Model, which can then be passed to other functions
@@ -184,35 +195,21 @@ def init_model(
     also ensures we are creating a model supported libraries.
     Args:
-        model: A trained model or VMModel instance
+        model: A trained model or VMModel instance.
         input_id (str): The input ID for the model (e.g. "my_model"). By default,
             this will be set to `model` but if you are passing this model as a
             test input using some other key than `model`, then you should set
             this to the same key.
-        attributes (dict): A dictionary of model attributes
-        predict_fn (callable): A function that takes an input and returns a prediction
-        **kwargs: Additional arguments to pass to the model
+        attributes (dict): A dictionary of model attributes.
+        predict_fn (callable): A function that takes an input and returns a prediction.
+        **kwargs: Additional arguments to pass to the model.
     Raises:
-        ValueError: If the model type is not supported
+        ValueError: If the model type is not supported.
     Returns:
-        vm.VMModel: A VM Model instance
+        vm.VMModel: A VM Model instance.
     """
-    # vm_model = model if isinstance(model, VMModel) else None
-    # metadata = None
-    # if not vm_model:
-    #     class_obj = get_model_class(model=model, predict_fn=predict_fn)
-    #     if not class_obj:
-    #         if not attributes:
-    #             raise UnsupportedModelError(
-    #                 f"Model class {str(model.__class__)} is not supported at the moment."
-    #             )
-    #         elif not is_model_metadata(attributes):
-    #             raise UnsupportedModelError(
-    #                 f"Model attributes {str(attributes)} are missing required keys 'architecture' and 'language'."
-    #             )
     vm_model = model if isinstance(model, VMModel) else None
     class_obj = get_model_class(model=model, predict_fn=predict_fn)
@@ -276,26 +273,18 @@ def init_r_model(
     input_id: str = "model",
 ) -> VMModel:
     """
-    Initializes a VM Model for an R model
-    R models must be saved to disk and the filetype depends on the model type...
-    Currently we support the following model types:
-    - LogisticRegression `glm` model in R: saved as an RDS file with `saveRDS`
-    - LinearRegression `lm` model in R: saved as an RDS file with `saveRDS`
-    - XGBClassifier: saved as a .json or .bin file with `xgb.save`
-    - XGBRegressor: saved as a .json or .bin file with `xgb.save`
+    Initialize a VM Model from an R model.
     LogisticRegression and LinearRegression models are converted to sklearn models by extracting
     the coefficients and intercept from the R model. XGB models are loaded using the xgboost
-    since xgb models saved in .json or .bin format can be loaded directly with either Python or R
+    since xgb models saved in .json or .bin format can be loaded directly with either Python or R.
     Args:
-        model_path (str): The path to the R model saved as an RDS or XGB file
-        model_type (str): The type of the model (one of R_MODEL_TYPES)
+        model_path (str): The path to the R model saved as an RDS or XGB file.
+        input_id (str): The input ID for the model. Defaults to "model".
     Returns:
-        vm.vm.Model: A VM Model instance
+        VMModel: A VM Model instance.
     """
     # TODO: proper check for supported models
@@ -329,12 +318,12 @@ def init_r_model(
 def get_test_suite(
-    test_suite_id: str = None,
-    section: str = None,
-    *args,
-    **kwargs,
+    test_suite_id: Optional[str] = None,
+    section: Optional[str] = None,
+    *args: Any,
+    **kwargs: Any,
 ) -> TestSuite:
-    """Gets a TestSuite object for the current project or a specific test suite
+    """Gets a TestSuite object for the current project or a specific test suite.
     This function provides an interface to retrieve the TestSuite instance for the
     current project or a specific TestSuite instance identified by test_suite_id.
@@ -348,8 +337,11 @@ def get_test_suite(
         section (str, optional): The section of the documentation template from which
             to retrieve the test suite. This only applies if test_suite_id is None.
             Defaults to None.
-        args: Additional arguments to pass to the TestSuite
-        kwargs: Additional keyword arguments to pass to the TestSuite
+        args: Additional arguments to pass to the TestSuite.
+        kwargs: Additional keyword arguments to pass to the TestSuite.
+    Returns:
+        TestSuite: The TestSuite instance.
     """
     if test_suite_id is None:
         if client_config.documentation_template is None:
@@ -365,31 +357,36 @@ def get_test_suite(
 def run_test_suite(
-    test_suite_id, send=True, fail_fast=False, config=None, inputs=None, **kwargs
-):
-    """High Level function for running a test suite
+    test_suite_id: str,
+    send: bool = True,
+    fail_fast: bool = False,
+    config: Optional[Dict[str, Any]] = None,
+    inputs: Optional[Dict[str, Any]] = None,
+    **kwargs: Any,
+) -> TestSuite:
+    """High Level function for running a test suite.
     This function provides a high level interface for running a test suite. A test suite is
     a collection of tests. This function will automatically find the correct test suite
     class based on the test_suite_id, initialize each of the tests, and run them.
     Args:
-        test_suite_id (str): The test suite name (e.g. 'classifier_full_suite')
+        test_suite_id (str): The test suite name. For example, 'classifier_full_suite'.
         config (dict, optional): A dictionary of parameters to pass to the tests in the
             test suite. Defaults to None.
         send (bool, optional): Whether to post the test results to the API. send=False
             is useful for testing. Defaults to True.
         fail_fast (bool, optional): Whether to stop running tests after the first failure. Defaults to False.
-        inputs (dict, optional): A dictionary of test inputs to pass to the TestSuite e.g. `model`, `dataset`
-            `models` etc. These inputs will be accessible by any test in the test suite. See the test
-            documentation or `vm.describe_test()` for more details on the inputs required for each.
-        **kwargs: backwards compatibility for passing in test inputs using keyword arguments
+        inputs (dict, optional): A dictionary of test inputs to pass to the TestSuite, such as `model`, `dataset`
+            `models`, etc. These inputs will be accessible by any test in the test suite. See the test
+            documentation or `vm.describe_test()` for more details on the inputs required for each. Defaults to None.
+        **kwargs: backwards compatibility for passing in test inputs using keyword arguments.
     Raises:
-        ValueError: If the test suite name is not found or if there is an error initializing the test suite
+        ValueError: If the test suite name is not found or if there is an error initializing the test suite.
     Returns:
-        TestSuite: the TestSuite instance
+        TestSuite: The TestSuite instance.
     """
     try:
         Suite: TestSuite = get_test_suite_by_id(test_suite_id)
@@ -414,14 +411,14 @@ def run_test_suite(
     return suite
-def preview_template():
-    """Preview the documentation template for the current project
+def preview_template() -> None:
+    """Preview the documentation template for the current project.
     This function will display the documentation template for the current project. If
     the project has not been initialized, then an error will be raised.
     Raises:
-        ValueError: If the project has not been initialized
+        ValueError: If the project has not been initialized.
     """
     if client_config.documentation_template is None:
         raise MissingDocumentationTemplate(
@@ -432,9 +429,14 @@ def preview_template():
 def run_documentation_tests(
-    section=None, send=True, fail_fast=False, inputs=None, config=None, **kwargs
-):
-    """Collect and run all the tests associated with a template
+    section: Optional[str] = None,
+    send: bool = True,
+    fail_fast: bool = False,
+    inputs: Optional[Dict[str, Any]] = None,
+    config: Optional[Dict[str, Any]] = None,
+    **kwargs: Any,
+) -> Union[TestSuite, Dict[str, TestSuite]]:
+    """Collect and run all the tests associated with a template.
     This function will analyze the current project's documentation template and collect
     all the tests associated with it into a test suite. It will then run the test
@@ -444,15 +446,15 @@ def run_documentation_tests(
         section (str or list, optional): The section(s) to preview. Defaults to None.
         send (bool, optional): Whether to send the results to the ValidMind API. Defaults to True.
         fail_fast (bool, optional): Whether to stop running tests after the first failure. Defaults to False.
-        inputs (dict, optional): A dictionary of test inputs to pass to the TestSuite
-        config: A dictionary of test parameters to override the defaults
-        **kwargs: backwards compatibility for passing in test inputs using keyword arguments
+        inputs (dict, optional): A dictionary of test inputs to pass to the TestSuite.
+        config: A dictionary of test parameters to override the defaults.
+        **kwargs: backwards compatibility for passing in test inputs using keyword arguments.
     Returns:
         TestSuite or dict: The completed TestSuite instance or a dictionary of TestSuites if section is a list.
     Raises:
-        ValueError: If the project has not been initialized
+        ValueError: If the project has not been initialized.
     """
     if client_config.documentation_template is None:
         raise MissingDocumentationTemplate(
@@ -487,24 +489,30 @@ def run_documentation_tests(
 def _run_documentation_section(
-    template, section, send=True, fail_fast=False, config=None, inputs=None, **kwargs
-):
-    """Run all tests in a template section
+    template: str,
+    section: str,
+    send: bool = True,
+    fail_fast: bool = False,
+    config: Optional[Dict[str, Any]] = None,
+    inputs: Optional[Dict[str, Any]] = None,
+    **kwargs: Any,
+) -> TestSuite:
+    """Run all tests in a template section.
     This function will collect all tests used in a template section into a TestSuite and then
     run the TestSuite as usual.
     Args:
-        template: A valid flat template
-        section: The section of the template to run (if not provided, run all sections)
-        send: Whether to send the results to the ValidMind API
+        template: A valid flat template.
+        section: The section of the template to run (if not provided, run all sections).
+        send: Whether to send the results to the ValidMind API.
         fail_fast (bool, optional): Whether to stop running tests after the first failure. Defaults to False.
-        config: A dictionary of test parameters to override the defaults
-        inputs: A dictionary of test inputs to pass to the TestSuite
-        **kwargs: backwards compatibility for passing in test inputs using keyword arguments
+        config: A dictionary of test parameters to override the defaults.
+        inputs: A dictionary of test inputs to pass to the TestSuite.
+        **kwargs: backwards compatibility for passing in test inputs using keyword arguments.
     Returns:
-        The completed TestSuite instance
+        The completed TestSuite instance.
     """
     test_suite = get_template_test_suite(template, section)

validmind/client_config.py CHANGED Viewed

@@ -13,7 +13,7 @@ from dataclasses import dataclass
 @dataclass
 class ClientConfig:
     """
-    Configuration class for the ValidMind API client. This is instantiated
+    Configuration class for the ValidMind API client. This class is instantiated
     when initializing the API client.
     """
@@ -25,7 +25,7 @@ class ClientConfig:
     def __post_init__(self):
         """
-        Set additional attributes when initializing the class
+        Set additional attributes when initializing the class.
         """
         # check if running on notebook and set running_on_colab
         try:
@@ -36,7 +36,7 @@ class ClientConfig:
             self.running_on_colab = False
     def can_generate_llm_test_descriptions(self):
-        """Returns True if the client can generate LLM based test descriptions"""
+        """Returns True if the client can generate LLM-based test descriptions."""
         return self.feature_flags.get("llm_test_descriptions", True)

validmind/datasets/classification/__init__.py CHANGED Viewed

@@ -5,6 +5,8 @@
 """
 Entrypoint for classification datasets.
 """
+from typing import List
 import pandas as pd
 __all__ = [
@@ -13,7 +15,7 @@ __all__ = [
 ]
-def simple_preprocess_booleans(df, columns):
+def simple_preprocess_booleans(df: pd.DataFrame, columns: List[str]) -> pd.DataFrame:
     """
     Preprocess boolean columns.
@@ -36,7 +38,9 @@ def simple_preprocess_booleans(df, columns):
     return df
-def simple_preprocess_categoricals(df, columns):
+def simple_preprocess_categoricals(
+    df: pd.DataFrame, columns: List[str]
+) -> pd.DataFrame:
     """
     Preprocess categorical columns.
@@ -56,7 +60,7 @@ def simple_preprocess_categoricals(df, columns):
     return df
-def simple_preprocess_numericals(df, columns):
+def simple_preprocess_numericals(df: pd.DataFrame, columns: List[str]) -> pd.DataFrame:
     """
     Preprocess numerical columns.

validmind/datasets/credit_risk/lending_club.py CHANGED Viewed

@@ -5,6 +5,7 @@
 import logging
 import os
 import warnings
+from typing import Any, Dict, Optional, Tuple
 import numpy as np
 import pandas as pd
@@ -101,12 +102,15 @@ score_params = {
 }
-def load_data(source="online", verbose=True):
+def load_data(source: str = "online", verbose: bool = True) -> pd.DataFrame:
     """
     Load data from either an online source or offline files, automatically dropping specified columns for offline data.
-    :param source: 'online' for online data, 'offline' for offline files. Defaults to 'online'.
-    :return: DataFrame containing the loaded data.
+    Args:
+        source: 'online' for online data, 'offline' for offline files. Defaults to 'online'.
+    Returns:
+        DataFrame: DataFrame containing the loaded data.
     """
     if source == "online":
@@ -136,7 +140,7 @@ def load_data(source="online", verbose=True):
     return df
-def _clean_data(df, verbose=True):
+def _clean_data(df: pd.DataFrame, verbose: bool = True) -> pd.DataFrame:
     df = df.copy()
     # Drop columns not relevant for application scorecards
@@ -182,7 +186,7 @@ def _clean_data(df, verbose=True):
     return df
-def preprocess(df, verbose=True):
+def preprocess(df: pd.DataFrame, verbose: bool = True) -> pd.DataFrame:
     df = df.copy()
     # Convert the target variable to integer type for modeling.
@@ -245,7 +249,7 @@ def preprocess(df, verbose=True):
     return df
-def _preprocess_term(df):
+def _preprocess_term(df: pd.DataFrame) -> pd.DataFrame:
     df = df.copy()
     # Remove ' months' and convert to integer
@@ -254,7 +258,7 @@ def _preprocess_term(df):
     return df
-def _preprocess_emp_length(df):
+def _preprocess_emp_length(df: pd.DataFrame) -> pd.DataFrame:
     df = df.copy()
     # Mapping string values to numbers
@@ -281,7 +285,7 @@ def _preprocess_emp_length(df):
     return df
-def feature_engineering(df, verbose=True):
+def feature_engineering(df: pd.DataFrame, verbose: bool = True) -> pd.DataFrame:
     df = df.copy()
     # WoE encoding of numerical and categorical features
@@ -295,7 +299,7 @@ def feature_engineering(df, verbose=True):
     return df
-def woe_encoding(df, verbose=True):
+def woe_encoding(df: pd.DataFrame, verbose: bool = True) -> pd.DataFrame:
     df = df.copy()
     woe = _woebin(df, verbose=verbose)
@@ -316,7 +320,7 @@ def woe_encoding(df, verbose=True):
     return df
-def _woe_to_bins(woe):
+def _woe_to_bins(woe: Dict[str, Any]) -> Dict[str, Any]:
     # Select and rename columns
     transformed_df = woe[
         [
@@ -350,7 +354,7 @@ def _woe_to_bins(woe):
     return bins
-def _woebin(df, verbose=True):
+def _woebin(df: pd.DataFrame, verbose: bool = True) -> Dict[str, Any]:
     """
     This function performs automatic binning using WoE.
     df: A pandas dataframe
@@ -380,7 +384,13 @@ def _woebin(df, verbose=True):
         return bins_df
-def split(df, validation_size=None, test_size=0.2, add_constant=False, verbose=True):
+def split(
+    df: pd.DataFrame,
+    validation_split: Optional[float] = None,
+    test_size: float = 0.2,
+    add_constant: bool = False,
+    verbose: bool = True,
+) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]:
     """
     Split dataset into train, validation (optional), and test sets.
@@ -404,7 +414,7 @@ def split(df, validation_size=None, test_size=0.2, add_constant=False, verbose=T
     if add_constant:
         test_df = sm.add_constant(test_df)
-    if validation_size is None:
+    if validation_split is None:
         if add_constant:
             train_val_df = sm.add_constant(train_val_df)
@@ -423,7 +433,7 @@ def split(df, validation_size=None, test_size=0.2, add_constant=False, verbose=T
         return train_val_df, test_df
     # Calculate validation size as proportion of remaining data
-    val_size = validation_size / (1 - test_size)
+    val_size = validation_split / (1 - test_size)
     train_df, validation_df = train_test_split(
         train_val_df, test_size=val_size, random_state=42
     )
@@ -451,7 +461,7 @@ def split(df, validation_size=None, test_size=0.2, add_constant=False, verbose=T
     return train_df, validation_df, test_df
-def compute_scores(probabilities):
+def compute_scores(probabilities: np.ndarray) -> np.ndarray:
     target_score = score_params["target_score"]
     target_odds = score_params["target_odds"]
     pdo = score_params["pdo"]
@@ -465,7 +475,9 @@ def compute_scores(probabilities):
     return scores
-def get_demo_test_config(x_test=None, y_test=None):
+def get_demo_test_config(
+    x_test: Optional[np.ndarray] = None, y_test: Optional[np.ndarray] = None
+) -> Dict[str, Any]:
     """Get demo test configuration.
     Args:

validmind/datasets/nlp/cnn_dailymail.py CHANGED Viewed

@@ -4,6 +4,7 @@
 import os
 import textwrap
+from typing import Optional, Tuple
 import pandas as pd
 from datasets import load_dataset
@@ -22,13 +23,18 @@ current_path = os.path.dirname(os.path.abspath(__file__))
 dataset_path = os.path.join(current_path, "datasets")
-def load_data(source="online", dataset_size=None):
+def load_data(
+    source: str = "online", dataset_size: Optional[str] = None
+) -> Tuple[pd.DataFrame, pd.DataFrame]:
     """
     Load data from either online source or offline files.
-    :param source: 'online' for online data, 'offline' for offline data. Defaults to 'online'.
-    :param dataset_size: Applicable if source is 'offline'. '300k' or '500k' for dataset size. Defaults to None.
-    :return: DataFrame containing the loaded data.
+    Args:
+        source: 'online' for online data, 'offline' for offline data. Defaults to 'online'.
+        dataset_size: Applicable if source is 'offline'. '300k' or '500k' for dataset size. Defaults to None.
+    Returns:
+        Tuple containing (train_df, test_df) DataFrames with the loaded data.
     """
     if source == "online":
         # Load online data without predictions

validmind/datasets/regression/__init__.py CHANGED Viewed

@@ -5,20 +5,25 @@
 """
 Entrypoint for regression datasets
 """
+from typing import List
 import pandas as pd
-__all__ = [
+__all__: List[str] = [
     "fred",
     "lending_club",
 ]
-def identify_frequencies(df):
+def identify_frequencies(df: pd.DataFrame) -> pd.DataFrame:
     """
     Identify the frequency of each series in the DataFrame.
-    :param df: Time-series DataFrame
-    :return: DataFrame with two columns: 'Variable' and 'Frequency'
+    Args:
+        df: Time-series DataFrame.
+    Returns:
+        DataFrame with two columns: "Variable" and "Frequency".
     """
     frequencies = []
     for column in df.columns:
@@ -36,7 +41,19 @@ def identify_frequencies(df):
     return freq_df
-def resample_to_common_frequency(df, common_frequency="MS"):
+def resample_to_common_frequency(
+    df: pd.DataFrame, common_frequency: str = "MS"
+) -> pd.DataFrame:
+    """
+    Resample time series data to a common frequency.
+    Args:
+        df: Time-series DataFrame.
+        common_frequency: Target frequency for resampling. Defaults to "MS" (month start).
+    Returns:
+        DataFrame with data resampled to the common frequency.
+    """
     # Make sure the index is a datetime index
     if not isinstance(df.index, pd.DatetimeIndex):
         df.index = pd.to_datetime(df.index)

validmind 2.8.10__py3-none-any.whl → 2.8.20__py3-none-any.whl

validmind 2.8.10py3-none-any.whl → 2.8.20py3-none-any.whl