PyPI - validmind - Versions diffs - 2.5.8__py3-none-any.whl → 2.5.18__py3-none-any.whl - Mend

validmind 2.5.8py3-none-any.whl → 2.5.18py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (233) hide show

validmind/api_client.py CHANGED Viewed

@@ -186,12 +186,24 @@ def __ping() -> Dict[str, Any]:
     client_config.project = client_info["project"]
     client_config.documentation_template = client_info.get("documentation_template", {})
     client_config.feature_flags = client_info.get("feature_flags", {})
+    client_config.model = client_info.get("model", {})
+    client_config.document_type = client_info.get(
+        "document_type", "model_documentation"
+    )
     if ack_connected:
-        logger.info(
-            f"Connected to ValidMind... Current Model: {client_config.project['name']}"
-            f" ({client_config.project['cuid']})"
-        )
+        if client_config.model:
+            logger.info(
+                f"🎉 Connected to ValidMind!\n"
+                f"📊 Model: {client_config.model.get('name', 'N/A')} "
+                f"(ID: {client_config.model.get('cuid', 'N/A')})\n"
+                f"📁 Document Type: {client_config.document_type}"
+            )
+        else:
+            logger.info(
+                f"Connected to ValidMind... Current Model: {client_config.project['name']}"
+                f" ({client_config.project['cuid']})"
+            )
 def reload():
@@ -331,32 +343,6 @@ async def log_figures(figures: List[Figure]) -> Dict[str, Any]:
     Returns:
         dict: The response from the API
     """
-    # this actually slows things down - better to log them in parallel
-    # if client_config.can_log_figures():  # check if the backend supports batch logging
-    #     try:
-    #         data = {}
-    #         files = {}
-    #         for figure in figures:
-    #             data.update(
-    #                 {f"{k}-{figure.key}": v for k, v in figure.serialize().items()}
-    #             )
-    #             files.update(
-    #                 {
-    #                     f"{k}-{figure.key}": v
-    #                     for k, v in figure.serialize_files().items()
-    #                 }
-    #             )
-    #         return await _post(
-    #             "log_figures",
-    #             data=data,
-    #             files=files,
-    #         )
-    #     except Exception as e:
-    #         logger.error("Error logging figures to ValidMind API")
-    #         raise e
-    # else:
     return await asyncio.gather(*[log_figure(figure) for figure in figures])
@@ -416,11 +402,11 @@ async def log_metrics(
     Returns:
         dict: The response from the API
     """
-    params = {}
+    request_params = {}
     if section_id:
-        params["section_id"] = section_id
+        request_params["section_id"] = section_id
     if position is not None:
-        params["position"] = position
+        request_params["position"] = position
     data = []
@@ -430,7 +416,7 @@ async def log_metrics(
             "inputs": inputs,
         }
-        if output_template and client_config.can_log_output_template():
+        if output_template:
             metric_data["output_template"] = output_template
         data.append(metric_data)
@@ -438,7 +424,7 @@ async def log_metrics(
     try:
         return await _post(
             "log_metrics",
-            params=params,
+            params=request_params,
             data=json.dumps(data, cls=NumpyEncoder, allow_nan=False),
         )
     except Exception as e:
@@ -469,16 +455,16 @@ async def log_test_result(
     Returns:
         dict: The response from the API
     """
-    params = {}
+    request_params = {}
     if section_id:
-        params["section_id"] = section_id
+        request_params["section_id"] = section_id
     if position is not None:
-        params["position"] = position
+        request_params["position"] = position
     try:
         return await _post(
             "log_test_results",
-            params=params,
+            params=request_params,
             data=json.dumps(
                 {
                     **result.serialize(),
@@ -503,7 +489,7 @@ def log_test_results(
     Args:
         results (list): A list of ThresholdTestResults objects
-        inputs (list): A list of input keys (names) that were used to run the test
+        inputs (list): A list of input IDs that were used to run the test
     Raises:
         Exception: If the API call fails
@@ -522,11 +508,11 @@ def log_test_results(
     return responses
-def log_input(name: str, type: str, metadata: Dict[str, Any]) -> Dict[str, Any]:
+def log_input(input_id: str, type: str, metadata: Dict[str, Any]) -> Dict[str, Any]:
     """Logs input information - internal use for now (don't expose via public API)
     Args:
-        name (str): The name of the input
+        input_id (str): The input_id of the input
         type (str): The type of the input
         metadata (dict): The metadata of the input
@@ -542,7 +528,7 @@ def log_input(name: str, type: str, metadata: Dict[str, Any]) -> Dict[str, Any]:
             "log_input",
             data=json.dumps(
                 {
-                    "name": name,
+                    "name": input_id,
                     "type": type,
                     "metadata": metadata,
                 },
@@ -555,6 +541,66 @@ def log_input(name: str, type: str, metadata: Dict[str, Any]) -> Dict[str, Any]:
         raise e
+async def alog_metric(
+    key: str,
+    value: float,
+    inputs: Optional[List[str]] = None,
+    params: Optional[Dict[str, Any]] = None,
+    recorded_at: Optional[str] = None,
+) -> None:
+    """See log_metric for details"""
+    if not key or not isinstance(key, str):
+        raise ValueError("`key` must be a non-empty string")
+    if not value or not isinstance(value, (int, float)):
+        raise ValueError("`value` must be a scalar (int or float)")
+    try:
+        return await _post(
+            "log_unit_metric",
+            data=json.dumps(
+                {
+                    "key": key,
+                    "value": value,
+                    "inputs": inputs or [],
+                    "params": params or {},
+                    "recorded_at": recorded_at,
+                },
+                cls=NumpyEncoder,
+                allow_nan=False,
+            ),
+        )
+    except Exception as e:
+        logger.error("Error logging metric to ValidMind API")
+        raise e
+def log_metric(
+    key: str,
+    value: float,
+    inputs: Optional[List[str]] = None,
+    params: Optional[Dict[str, Any]] = None,
+    recorded_at: Optional[str] = None,
+) -> None:
+    """Logs a unit metric
+    Unit metrics are key-value pairs where the key is the metric name and the value is
+    a scalar (int or float). These key-value pairs are associated with the currently
+    selected model (inventory model in the ValidMind Platform) and keys can be logged
+    to over time to create a history of the metric. On the platform, these metrics
+    will be used to create plots/visualizations for documentation and dashboards etc.
+    Args:
+        key (str): The metric key
+        value (float): The metric value
+        inputs (list, optional): A list of input IDs that were used to compute the metric.
+        params (dict, optional): Dictionary of parameters used to compute the metric.
+        recorded_at (str, optional): The timestamp of the metric. Server will use
+            current time if not provided.
+    """
+    run_async(alog_metric, key, value, inputs, params, recorded_at)
 def start_run() -> str:
     """Starts a new test run

validmind/client.py CHANGED Viewed

@@ -164,7 +164,7 @@ def init_dataset(
     if __log:
         log_input(
-            name=input_id,
+            input_id=input_id,
             type="dataset",
             metadata=get_dataset_info(vm_dataset),
         )
@@ -265,7 +265,7 @@ def init_model(
     if __log:
         log_input(
-            name=input_id,
+            input_id=input_id,
             type="model",
             metadata=metadata,
         )

validmind/client_config.py CHANGED Viewed

@@ -18,7 +18,9 @@ class ClientConfig:
     """
     project: object
+    model: object
     feature_flags: dict
+    document_type: str
     documentation_template: object
     running_on_colab: bool = False
@@ -34,21 +36,16 @@ class ClientConfig:
         except ImportError:
             self.running_on_colab = False
-    def is_json_plots_enabled(self):
-        """
-        Returns True if the JSON plots feature flag is enabled on the backend
-        """
-        return self.feature_flags.get("generate_json_plots", False)
-    def can_log_figures(self):
-        """Returns True if the client can log figures to the API"""
-        return self.feature_flags.get("log_figures", False)
-    def can_log_output_template(self):
-        """Returns True if the client can log output templates to the API"""
-        return self.feature_flags.get("output_templates", False)
+    def can_generate_llm_test_descriptions(self):
+        """Returns True if the client can generate LLM based test descriptions"""
+        return self.feature_flags.get("llm_test_descriptions", True)
 client_config = ClientConfig(
-    project=None, feature_flags={}, documentation_template=None
+    project=None,
+    model=None,
+    feature_flags={},
+    document_type="model_documentation",
+    documentation_template=None,
+    running_on_colab=False,
 )

validmind/datasets/credit_risk/__init__.py CHANGED Viewed

@@ -8,4 +8,5 @@ Entrypoint for credit risk datasets.
 __all__ = [
     "lending_club",
+    "lending_club_bias",
 ]

validmind/datasets/credit_risk/datasets/lending_club_biased.csv.gz ADDED Viewed

Binary file

validmind/datasets/credit_risk/lending_club_bias.py ADDED Viewed

@@ -0,0 +1,142 @@
+# Copyright © 2023-2024 ValidMind Inc. All rights reserved.
+# See the LICENSE file in the root of this repository for details.
+# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
+import os
+import numpy as np
+import pandas as pd
+from sklearn.model_selection import train_test_split
+from sklearn.preprocessing import LabelEncoder
+current_path = os.path.dirname(os.path.abspath(__file__))
+dataset_path = os.path.join(current_path, "datasets")
+# URLs or file paths for online and offline data
+data_file = os.path.join(dataset_path, "lending_club_biased.csv.gz")
+target_column = "loan_status"
+protected_classes = ["Gender", "Race", "Marital_Status"]
+drop_columns = ["total_pymnt", "id", "verification_status", "purpose"]
+score_params = {
+    "target_score": 600,
+    "target_odds": 50,
+    "pdo": 20,
+}
+def load_data():
+    """
+    Load data from the specified CSV file.
+    :return: DataFrame containing the loaded data.
+    """
+    print(f"Loading data from: {data_file}")
+    # Since we know the offline_data_file path ends with '.zip', we replace it with '.csv.gz'
+    gzip_file_path = data_file.replace(".zip", ".csv.gz")
+    # Read the CSV file directly from the .gz archive
+    df = pd.read_csv(gzip_file_path, compression="gzip")
+    print("Data loaded successfully.")
+    df = _clean_data(df)
+    return df
+def _clean_data(df):
+    df = df.copy()
+    print("Loading the raw dataset:")
+    print(
+        f"Rows: {df.shape[0]}\nColumns: {df.shape[1]}\nMissing values: {df.isnull().sum().sum()}\n"
+    )
+    # Drop columns not relevant for this model
+    print(f"Dropping columns not relevant for this model: {drop_columns}")
+    df = df.drop(columns=drop_columns)
+    print(
+        f"Rows: {df.shape[0]}\nColumns: {df.shape[1]}\nMissing values: {df.isnull().sum().sum()}\n"
+    )
+    # Drop rows with missing target values
+    df.dropna(subset=[target_column], inplace=True)
+    print("Dropping rows with missing target values:")
+    print(
+        f"Rows: {df.shape[0]}\nColumns: {df.shape[1]}\nMissing values: {df.isnull().sum().sum()}\n"
+    )
+    # Drop columns with more than N percent missing values
+    missing_values = df.isnull().mean()
+    df = df.loc[:, missing_values < 0.7]
+    print("Dropping columns with more than 70% missing values:")
+    print(
+        f"Rows: {df.shape[0]}\nColumns: {df.shape[1]}\nMissing values: {df.isnull().sum().sum()}\n"
+    )
+    # Drop columns with only one unique value
+    unique_values = df.nunique()
+    df = df.loc[:, unique_values > 1]
+    print("Dropping columns with only one unique value:")
+    print(
+        f"Rows: {df.shape[0]}\nColumns: {df.shape[1]}\nMissing values: {df.isnull().sum().sum()}\n"
+    )
+    return df
+def preprocess(df):
+    df = df.copy()
+    # Convert the target variable to integer type for modeling.
+    df[target_column] = df[target_column].astype(int)
+    # Identify and encode categorical variables for modeling purposes
+    label_encoders = {}
+    categorical_columns = df.select_dtypes(include=["object"]).columns
+    for column in categorical_columns:
+        le = LabelEncoder()
+        df[f"{column}_encoded"] = le.fit_transform(df[column])
+        label_encoders[column] = le
+        df = df.drop(columns=[column])  # Remove the original column
+    print(f"Encoding categorical variables: {list(categorical_columns)}")
+    print(
+        f"Rows: {df.shape[0]}\nColumns: {df.shape[1]}\nMissing values: {df.isnull().sum().sum()}\n"
+    )
+    return df
+def split(df, test_size=0.3):
+    df = df.copy()
+    # Splitting the dataset into training and test sets
+    train_df, test_df = train_test_split(df, test_size=test_size, random_state=42)
+    # Calculate and print details for the training dataset
+    print(
+        f"Training Dataset:\nRows: {train_df.shape[0]}\nColumns: {train_df.shape[1]}\nMissing values: {train_df.isnull().sum().sum()}\n"
+    )
+    # Calculate and print details for the test dataset
+    print(
+        f"Test Dataset:\nRows: {test_df.shape[0]}\nColumns: {test_df.shape[1]}\nMissing values: {test_df.isnull().sum().sum()}\n"
+    )
+    return train_df, test_df
+def compute_scores(probabilities):
+    target_score = score_params["target_score"]
+    target_odds = score_params["target_odds"]
+    pdo = score_params["pdo"]
+    factor = pdo / np.log(2)
+    offset = target_score - (factor * np.log(target_odds))
+    scores = offset + factor * np.log(probabilities / (1 - probabilities))
+    return scores

validmind 2.5.8__py3-none-any.whl → 2.5.18__py3-none-any.whl

validmind 2.5.8py3-none-any.whl → 2.5.18py3-none-any.whl