PyPI - workbench - Versions diffs - 0.8.202__py3-none-any.whl → 0.8.220__py3-none-any.whl - Mend

workbench 0.8.202py3-none-any.whl → 0.8.220py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of workbench might be problematic. Click here for more details.

Files changed (84) hide show

workbench/algorithms/dataframe/compound_dataset_overlap.py +321 -0
workbench/algorithms/dataframe/feature_space_proximity.py +168 -75
workbench/algorithms/dataframe/fingerprint_proximity.py +421 -85
workbench/algorithms/dataframe/projection_2d.py +44 -21
workbench/algorithms/dataframe/proximity.py +78 -150
workbench/algorithms/graph/light/proximity_graph.py +5 -5
workbench/algorithms/models/cleanlab_model.py +382 -0
workbench/algorithms/models/noise_model.py +388 -0
workbench/algorithms/sql/outliers.py +3 -3
workbench/api/__init__.py +3 -0
workbench/api/df_store.py +17 -108
workbench/api/endpoint.py +13 -11
workbench/api/feature_set.py +111 -8
workbench/api/meta_model.py +289 -0
workbench/api/model.py +45 -12
workbench/api/parameter_store.py +3 -52
workbench/cached/cached_model.py +4 -4
workbench/core/artifacts/artifact.py +5 -5
workbench/core/artifacts/df_store_core.py +114 -0
workbench/core/artifacts/endpoint_core.py +228 -237
workbench/core/artifacts/feature_set_core.py +185 -230
workbench/core/artifacts/model_core.py +34 -26
workbench/core/artifacts/parameter_store_core.py +98 -0
workbench/core/pipelines/pipeline_executor.py +1 -1
workbench/core/transforms/features_to_model/features_to_model.py +22 -10
workbench/core/transforms/model_to_endpoint/model_to_endpoint.py +41 -10
workbench/core/transforms/pandas_transforms/pandas_to_features.py +11 -2
workbench/model_script_utils/model_script_utils.py +339 -0
workbench/model_script_utils/pytorch_utils.py +405 -0
workbench/model_script_utils/uq_harness.py +278 -0
workbench/model_scripts/chemprop/chemprop.template +428 -631
workbench/model_scripts/chemprop/generated_model_script.py +432 -635
workbench/model_scripts/chemprop/model_script_utils.py +339 -0
workbench/model_scripts/chemprop/requirements.txt +2 -10
workbench/model_scripts/custom_models/chem_info/fingerprints.py +87 -46
workbench/model_scripts/custom_models/proximity/feature_space_proximity.py +194 -0
workbench/model_scripts/custom_models/proximity/feature_space_proximity.template +6 -6
workbench/model_scripts/custom_models/uq_models/feature_space_proximity.py +194 -0
workbench/model_scripts/meta_model/generated_model_script.py +209 -0
workbench/model_scripts/meta_model/meta_model.template +209 -0
workbench/model_scripts/pytorch_model/generated_model_script.py +374 -613
workbench/model_scripts/pytorch_model/model_script_utils.py +339 -0
workbench/model_scripts/pytorch_model/pytorch.template +370 -609
workbench/model_scripts/pytorch_model/pytorch_utils.py +405 -0
workbench/model_scripts/pytorch_model/requirements.txt +1 -1
workbench/model_scripts/pytorch_model/uq_harness.py +278 -0
workbench/model_scripts/script_generation.py +6 -5
workbench/model_scripts/uq_models/generated_model_script.py +65 -422
workbench/model_scripts/xgb_model/generated_model_script.py +372 -395
workbench/model_scripts/xgb_model/model_script_utils.py +339 -0
workbench/model_scripts/xgb_model/uq_harness.py +278 -0
workbench/model_scripts/xgb_model/xgb_model.template +366 -396
workbench/repl/workbench_shell.py +0 -5
workbench/resources/open_source_api.key +1 -1
workbench/scripts/endpoint_test.py +2 -2
workbench/scripts/meta_model_sim.py +35 -0
workbench/scripts/training_test.py +85 -0
workbench/utils/chem_utils/fingerprints.py +87 -46
workbench/utils/chem_utils/projections.py +16 -6
workbench/utils/chemprop_utils.py +36 -655
workbench/utils/meta_model_simulator.py +499 -0
workbench/utils/metrics_utils.py +256 -0
workbench/utils/model_utils.py +192 -54
workbench/utils/pytorch_utils.py +33 -472
workbench/utils/shap_utils.py +1 -55
workbench/utils/xgboost_local_crossfold.py +267 -0
workbench/utils/xgboost_model_utils.py +49 -356
workbench/web_interface/components/model_plot.py +7 -1
workbench/web_interface/components/plugins/model_details.py +30 -68
workbench/web_interface/components/plugins/scatter_plot.py +4 -8
{workbench-0.8.202.dist-info → workbench-0.8.220.dist-info}/METADATA +6 -5
{workbench-0.8.202.dist-info → workbench-0.8.220.dist-info}/RECORD +76 -60
{workbench-0.8.202.dist-info → workbench-0.8.220.dist-info}/entry_points.txt +2 -0
workbench/core/cloud_platform/aws/aws_df_store.py +0 -404
workbench/core/cloud_platform/aws/aws_parameter_store.py +0 -296
workbench/model_scripts/custom_models/meta_endpoints/example.py +0 -53
workbench/model_scripts/custom_models/proximity/proximity.py +0 -410
workbench/model_scripts/custom_models/uq_models/meta_uq.template +0 -377
workbench/model_scripts/custom_models/uq_models/proximity.py +0 -410
workbench/model_scripts/uq_models/mapie.template +0 -605
workbench/model_scripts/uq_models/requirements.txt +0 -1
{workbench-0.8.202.dist-info → workbench-0.8.220.dist-info}/WHEEL +0 -0
{workbench-0.8.202.dist-info → workbench-0.8.220.dist-info}/licenses/LICENSE +0 -0
{workbench-0.8.202.dist-info → workbench-0.8.220.dist-info}/top_level.txt +0 -0

workbench/utils/xgboost_model_utils.py CHANGED Viewed

@@ -1,32 +1,23 @@
 """XGBoost Model Utilities"""
+import glob
+import hashlib
 import logging
 import os
-import tempfile
-import joblib
 import pickle
-import glob
+import tempfile
+from typing import Any, List, Optional, Tuple
 import awswrangler as wr
-from typing import Optional, List, Tuple, Any
-import hashlib
+import joblib
 import pandas as pd
-import numpy as np
 import xgboost as xgb
-from sklearn.model_selection import KFold, StratifiedKFold
-from sklearn.metrics import (
-    precision_recall_fscore_support,
-    mean_squared_error,
-    mean_absolute_error,
-    r2_score,
-    median_absolute_error,
-    roc_auc_score,
-)
-from scipy.stats import spearmanr
-from sklearn.preprocessing import LabelEncoder
 # Workbench Imports
+from workbench.utils.aws_utils import pull_s3_data
+from workbench.utils.metrics_utils import compute_metrics_from_predictions
 from workbench.utils.model_utils import load_category_mappings_from_s3, safe_extract_tarfile
-from workbench.utils.pandas_utils import convert_categorical_types, expand_proba_column
+from workbench.utils.pandas_utils import convert_categorical_types
 # Set up the log
 log = logging.getLogger("workbench")
@@ -258,327 +249,45 @@ def leaf_stats(df: pd.DataFrame, target_col: str) -> pd.DataFrame:
     return result_df
-def cross_fold_inference(workbench_model: Any, nfolds: int = 5) -> Tuple[pd.DataFrame, pd.DataFrame]:
-    """
-    Performs K-fold cross-validation with detailed metrics.
+def pull_cv_results(workbench_model: Any) -> Tuple[pd.DataFrame, pd.DataFrame]:
+    """Pull cross-validation results from AWS training artifacts.
+    This retrieves the validation predictions saved during model training and
+    computes metrics directly from them. For XGBoost models trained with
+    n_folds > 1, these are out-of-fold predictions from k-fold cross-validation.
     Args:
         workbench_model: Workbench model object
-        nfolds: Number of folds for cross-validation (default is 5)
     Returns:
         Tuple of:
-            - DataFrame with per-class metrics (and 'all' row for overall metrics)
-            - DataFrame with columns: id, target, prediction, and *_proba columns (for classifiers)
+            - DataFrame with computed metrics
+            - DataFrame with validation predictions
     """
-    from workbench.api import FeatureSet
-    # Load model
-    model_artifact_uri = workbench_model.model_data_url()
-    loaded_model = xgboost_model_from_s3(model_artifact_uri)
-    if loaded_model is None:
-        log.error("No XGBoost model found in the artifact.")
-        return pd.DataFrame(), pd.DataFrame()
-    # Check if we got a full sklearn model or need to create one
-    if isinstance(loaded_model, (xgb.XGBClassifier, xgb.XGBRegressor)):
-        is_classifier = isinstance(loaded_model, xgb.XGBClassifier)
-        # Get the model's hyperparameters and ensure enable_categorical=True
-        params = loaded_model.get_params()
-        params["enable_categorical"] = True
-        # Create new model with same params but enable_categorical=True
-        if is_classifier:
-            xgb_model = xgb.XGBClassifier(**params)
-        else:
-            xgb_model = xgb.XGBRegressor(**params)
-    elif isinstance(loaded_model, xgb.Booster):
-        # Legacy: got a booster, need to wrap it
-        log.warning("Deprecated: Loaded model is a Booster, wrapping in sklearn model.")
-        is_classifier = workbench_model.model_type.value == "classifier"
-        xgb_model = (
-            xgb.XGBClassifier(enable_categorical=True) if is_classifier else xgb.XGBRegressor(enable_categorical=True)
-        )
-        xgb_model._Booster = loaded_model
-    else:
-        log.error(f"Unexpected model type: {type(loaded_model)}")
-        return pd.DataFrame(), pd.DataFrame()
-    # Prepare data
-    fs = FeatureSet(workbench_model.get_input())
-    df = workbench_model.training_view().pull_dataframe()
-    # Extract sample weights if present
-    sample_weights = df.get("sample_weight")
-    if sample_weights is not None:
-        log.info(f"Using sample weights: min={sample_weights.min():.2f}, max={sample_weights.max():.2f}")
-    # Get columns
-    id_col = fs.id_column
-    target_col = workbench_model.target()
-    feature_cols = workbench_model.features()
-    print(f"Target column: {target_col}")
-    print(f"Feature columns: {len(feature_cols)} features")
-    # Convert string[python] to object, then to category for XGBoost compatibility
-    for col in feature_cols:
-        if pd.api.types.is_string_dtype(df[col]):
-            df[col] = df[col].astype("object").astype("category")
-    X = df[feature_cols]
-    y = df[target_col]
-    ids = df[id_col]
-    # Encode target if classifier
-    label_encoder = LabelEncoder() if is_classifier else None
-    if label_encoder:
-        y_encoded = label_encoder.fit_transform(y)
-        y_for_cv = pd.Series(y_encoded, index=y.index, name=target_col)
-    else:
-        y_for_cv = y
-    # Prepare KFold
-    kfold = (StratifiedKFold if is_classifier else KFold)(n_splits=nfolds, shuffle=True, random_state=42)
-    # Initialize results collection
-    fold_metrics = []
-    predictions_df = pd.DataFrame({id_col: ids, target_col: y})
-    # Perform cross-validation
-    for fold_idx, (train_idx, val_idx) in enumerate(kfold.split(X, y_for_cv), 1):
-        X_train, X_val = X.iloc[train_idx], X.iloc[val_idx]
-        y_train, y_val = y_for_cv.iloc[train_idx], y_for_cv.iloc[val_idx]
-        # Get sample weights for training fold
-        weights_train = sample_weights.iloc[train_idx] if sample_weights is not None else None
-        # Train and predict
-        xgb_model.fit(X_train, y_train, sample_weight=weights_train)
-        preds = xgb_model.predict(X_val)
-        # Store predictions (decode if classifier)
-        val_indices = X_val.index
-        if is_classifier:
-            predictions_df.loc[val_indices, "prediction"] = label_encoder.inverse_transform(preds.astype(int))
-            y_proba = xgb_model.predict_proba(X_val)
-            predictions_df.loc[val_indices, "pred_proba"] = pd.Series(y_proba.tolist(), index=val_indices)
-        else:
-            predictions_df.loc[val_indices, "prediction"] = preds
-        # Calculate fold metrics
-        if is_classifier:
-            y_val_orig = label_encoder.inverse_transform(y_val)
-            preds_orig = label_encoder.inverse_transform(preds.astype(int))
-            # Overall weighted metrics
-            prec, rec, f1, _ = precision_recall_fscore_support(
-                y_val_orig, preds_orig, average="weighted", zero_division=0
-            )
-            # Per-class F1
-            prec_per_class, rec_per_class, f1_per_class, _ = precision_recall_fscore_support(
-                y_val_orig, preds_orig, average=None, zero_division=0, labels=label_encoder.classes_
-            )
-            # ROC-AUC (overall and per-class)
-            roc_auc_overall = roc_auc_score(y_val, y_proba, multi_class="ovr", average="macro")
-            roc_auc_per_class = roc_auc_score(y_val, y_proba, multi_class="ovr", average=None)
-            fold_metrics.append(
-                {
-                    "fold": fold_idx,
-                    "precision": prec,
-                    "recall": rec,
-                    "f1": f1,
-                    "roc_auc": roc_auc_overall,
-                    "precision_per_class": prec_per_class,
-                    "recall_per_class": rec_per_class,
-                    "f1_per_class": f1_per_class,
-                    "roc_auc_per_class": roc_auc_per_class,
-                }
-            )
-        else:
-            spearman_corr, _ = spearmanr(y_val, preds)
-            fold_metrics.append(
-                {
-                    "fold": fold_idx,
-                    "rmse": np.sqrt(mean_squared_error(y_val, preds)),
-                    "mae": mean_absolute_error(y_val, preds),
-                    "medae": median_absolute_error(y_val, preds),
-                    "r2": r2_score(y_val, preds),
-                    "spearmanr": spearman_corr,
-                }
-            )
-    # Calculate summary metrics
-    fold_df = pd.DataFrame(fold_metrics)
-    if is_classifier:
-        # Expand the *_proba columns into separate columns for easier handling
-        predictions_df = expand_proba_column(predictions_df, label_encoder.classes_)
-        # Build per-class metrics DataFrame
-        metric_rows = []
-        # Per-class rows
-        for idx, class_name in enumerate(label_encoder.classes_):
-            prec_scores = np.array([fold["precision_per_class"][idx] for fold in fold_metrics])
-            rec_scores = np.array([fold["recall_per_class"][idx] for fold in fold_metrics])
-            f1_scores = np.array([fold["f1_per_class"][idx] for fold in fold_metrics])
-            roc_auc_scores = np.array([fold["roc_auc_per_class"][idx] for fold in fold_metrics])
-            y_orig = label_encoder.inverse_transform(y_for_cv)
-            support = int((y_orig == class_name).sum())
-            metric_rows.append(
-                {
-                    "class": class_name,
-                    "precision": prec_scores.mean(),
-                    "recall": rec_scores.mean(),
-                    "f1": f1_scores.mean(),
-                    "roc_auc": roc_auc_scores.mean(),
-                    "support": support,
-                }
-            )
-        # Overall 'all' row
-        metric_rows.append(
-            {
-                "class": "all",
-                "precision": fold_df["precision"].mean(),
-                "recall": fold_df["recall"].mean(),
-                "f1": fold_df["f1"].mean(),
-                "roc_auc": fold_df["roc_auc"].mean(),
-                "support": len(y_for_cv),
-            }
-        )
-        metrics_df = pd.DataFrame(metric_rows)
-    else:
-        # Regression metrics
-        metrics_df = pd.DataFrame(
-            [
-                {
-                    "rmse": fold_df["rmse"].mean(),
-                    "mae": fold_df["mae"].mean(),
-                    "medae": fold_df["medae"].mean(),
-                    "r2": fold_df["r2"].mean(),
-                    "spearmanr": fold_df["spearmanr"].mean(),
-                    "support": len(y_for_cv),
-                }
-            ]
-        )
+    # Get the validation predictions from S3
+    s3_path = f"{workbench_model.model_training_path}/validation_predictions.csv"
+    predictions_df = pull_s3_data(s3_path)
-    return metrics_df, predictions_df
+    if predictions_df is None:
+        raise ValueError(f"No validation predictions found at {s3_path}")
+    log.info(f"Pulled {len(predictions_df)} validation predictions from {s3_path}")
-def leave_one_out_inference(workbench_model: Any) -> pd.DataFrame:
-    """
-    Performs leave-one-out cross-validation (parallelized).
-    For datasets > 1000 rows, first identifies top 100 worst predictions via 10-fold CV,
-    then performs true leave-one-out on those 100 samples.
-    Each model trains on ALL data except one sample.
-    """
-    from workbench.api import FeatureSet
-    from joblib import Parallel, delayed
-    from tqdm import tqdm
+    # Compute metrics from predictions
+    target = workbench_model.target()
+    class_labels = workbench_model.class_labels()
-    def train_and_predict_one(model_params, is_classifier, X, y, train_idx, val_idx):
-        """Train on train_idx, predict on val_idx."""
-        model = xgb.XGBClassifier(**model_params) if is_classifier else xgb.XGBRegressor(**model_params)
-        model.fit(X[train_idx], y[train_idx])
-        return model.predict(X[val_idx])[0]
-    # Load model and get params
-    model_artifact_uri = workbench_model.model_data_url()
-    loaded_model = xgboost_model_from_s3(model_artifact_uri)
-    if loaded_model is None:
-        log.error("No XGBoost model found in the artifact.")
-        return pd.DataFrame()
-    if isinstance(loaded_model, (xgb.XGBClassifier, xgb.XGBRegressor)):
-        is_classifier = isinstance(loaded_model, xgb.XGBClassifier)
-        model_params = loaded_model.get_params()
-    elif isinstance(loaded_model, xgb.Booster):
-        log.warning("Deprecated: Loaded model is a Booster, wrapping in sklearn model.")
-        is_classifier = workbench_model.model_type.value == "classifier"
-        model_params = {"enable_categorical": True}
-    else:
-        log.error(f"Unexpected model type: {type(loaded_model)}")
-        return pd.DataFrame()
-    # Load and prepare data
-    fs = FeatureSet(workbench_model.get_input())
-    df = workbench_model.training_view().pull_dataframe()
-    id_col = fs.id_column
-    target_col = workbench_model.target()
-    feature_cols = workbench_model.features()
-    # Convert string[python] to object, then to category for XGBoost compatibility
-    # This avoids XGBoost's issue with pandas 2.x string[python] dtype in categorical categories
-    for col in feature_cols:
-        if pd.api.types.is_string_dtype(df[col]):
-            # Double conversion: string[python] -> object -> category
-            df[col] = df[col].astype("object").astype("category")
-    # Determine which samples to run LOO on
-    if len(df) > 1000:
-        log.important(f"Dataset has {len(df)} rows. Running 10-fold CV to identify top 1000 worst predictions...")
-        _, predictions_df = cross_fold_inference(workbench_model, nfolds=10)
-        predictions_df["residual_abs"] = np.abs(predictions_df[target_col] - predictions_df["prediction"])
-        worst_samples = predictions_df.nlargest(1000, "residual_abs")
-        worst_ids = worst_samples[id_col].values
-        loo_indices = df[df[id_col].isin(worst_ids)].index.values
-        log.important(f"Running leave-one-out CV on 1000 worst samples. Each model trains on {len(df)-1} rows...")
+    if target in predictions_df.columns and "prediction" in predictions_df.columns:
+        metrics_df = compute_metrics_from_predictions(predictions_df, target, class_labels)
     else:
-        log.important(f"Running leave-one-out CV on all {len(df)} samples...")
-        loo_indices = df.index.values
-    # Prepare full dataset for training
-    X_full = df[feature_cols].values
-    y_full = df[target_col].values
-    # Encode target if classifier
-    label_encoder = LabelEncoder() if is_classifier else None
-    if label_encoder:
-        y_full = label_encoder.fit_transform(y_full)
-    # Generate LOO splits
-    splits = []
-    for loo_idx in loo_indices:
-        train_idx = np.delete(np.arange(len(X_full)), loo_idx)
-        val_idx = np.array([loo_idx])
-        splits.append((train_idx, val_idx))
-    # Parallel execution
-    predictions = Parallel(n_jobs=4)(
-        delayed(train_and_predict_one)(model_params, is_classifier, X_full, y_full, train_idx, val_idx)
-        for train_idx, val_idx in tqdm(splits, desc="LOO CV")
-    )
-    # Build results dataframe
-    predictions_array = np.array(predictions)
-    if label_encoder:
-        predictions_array = label_encoder.inverse_transform(predictions_array.astype(int))
-    predictions_df = pd.DataFrame(
-        {
-            id_col: df.loc[loo_indices, id_col].values,
-            target_col: df.loc[loo_indices, target_col].values,
-            "prediction": predictions_array,
-        }
-    )
+        metrics_df = pd.DataFrame()
-    predictions_df["residual_abs"] = np.abs(predictions_df[target_col] - predictions_df["prediction"])
-    return predictions_df
+    return metrics_df, predictions_df
 if __name__ == "__main__":
     """Exercise the Model Utilities"""
     from workbench.api import Model
-    from pprint import pprint
     # Test the XGBoost model loading and feature importance
     model = Model("abalone-regression")
@@ -594,38 +303,22 @@ if __name__ == "__main__":
     print(f"Model parameters: {xgb_model.get_params()}")
     print(f"enable_categorical: {xgb_model.enable_categorical}")
-    # Test with UQ Model
-    uq_model = Model("aqsol-uq")
-    _xgb_model = xgboost_model_from_s3(uq_model.model_data_url())
-    print("\n=== CROSS FOLD REGRESSION EXAMPLE ===")
+    print("\n=== PULL CV RESULTS EXAMPLE ===")
     model = Model("abalone-regression")
-    results, df = cross_fold_inference(model)
-    pprint(results)
-    print(df.head())
-    print("\n=== CROSS FOLD CLASSIFICATION EXAMPLE ===")
+    metrics_df, predictions_df = pull_cv_results(model)
+    print(f"\nMetrics:\n{metrics_df}")
+    print(f"\nPredictions shape: {predictions_df.shape}")
+    print(f"Predictions columns: {predictions_df.columns.tolist()}")
+    print(predictions_df.head())
+    # Test on a Classifier model
+    print("\n=== CLASSIFIER MODEL TEST ===")
     model = Model("wine-classification")
-    results, df = cross_fold_inference(model)
-    pprint(results)
-    print(df.head())
-    # Test XGBoost add_leaf_hash
-    """
-    input_df = FeatureSet(model.get_input()).pull_dataframe()
-    leaf_df = add_leaf_hash(model, input_df)
-    print("DataFrame with Leaf Hash:")
-    print(leaf_df)
-    # Okay, we're going to copy row 3 and insert it into row 7 to make sure the leaf_hash is the same
-    input_df.iloc[7] = input_df.iloc[3]
-    print("DataFrame with Leaf Hash (3 and 7 should match):")
-    leaf_df = add_leaf_hash(model, input_df)
-    print(leaf_df)
-    # Test leaf_stats
-    target_col = "class_number_of_rings"
-    stats_df = leaf_stats(leaf_df, target_col)
-    print("DataFrame with Leaf Statistics:")
-    print(stats_df)
-    """
+    features = feature_importance(model)
+    print("Feature Importance:")
+    print(features)
+    metrics_df, predictions_df = pull_cv_results(model)
+    print(f"\nMetrics:\n{metrics_df}")
+    print(f"\nPredictions shape: {predictions_df.shape}")
+    print(f"Predictions columns: {predictions_df.columns.tolist()}")
+    print(predictions_df.head())

workbench/web_interface/components/model_plot.py CHANGED Viewed

@@ -36,8 +36,14 @@ class ModelPlot(ComponentInterface):
             if df is None:
                 return self.display_text("No Data")
-            # Calculate the distance from the diagonal for each point
+            # Grab the target(s) for this model
             target = model.target()
+            # For multi-task models, match target to inference_run name or default to first
+            if isinstance(target, list):
+                target = next((t for t in target if t in inference_run), target[0])
+            # Compute error for coloring
             df["error"] = abs(df["prediction"] - df[target])
             return ScatterPlot().update_properties(
                 df,

workbench/web_interface/components/plugins/model_details.py CHANGED Viewed

@@ -41,7 +41,7 @@ class ModelDetails(PluginInterface):
             id=self.component_id,
             children=[
                 html.H4(id=f"{self.component_id}-header", children="Model: Loading..."),
-                dcc.Markdown(id=f"{self.component_id}-summary"),
+                dcc.Markdown(id=f"{self.component_id}-summary", dangerously_allow_html=True),
                 html.H5(children="Inference Metrics", style={"marginTop": "20px"}),
                 dcc.Dropdown(id=f"{self.component_id}-dropdown", className="dropdown"),
                 dcc.Markdown(id=f"{self.component_id}-metrics"),
@@ -106,63 +106,37 @@ class ModelDetails(PluginInterface):
         Returns:
             str: A markdown string
         """
-        # Get these fields from the model
-        show_fields = [
-            "health_tags",
-            "input",
-            "workbench_registered_endpoints",
-            "workbench_model_type",
-            "workbench_model_target",
-            "workbench_model_features",
-            "param_meta",
-            "workbench_tags",
-        ]
-        # Construct the markdown string
         summary = self.current_model.summary()
         markdown = ""
-        for key in show_fields:
-            # Special case for the health tags
-            if key == "health_tags":
-                markdown += health_tag_markdown(summary.get(key, []))
-                continue
-            # Special case for the features
-            if key == "workbench_model_features":
-                value = summary.get(key, [])
-                key = "features"
-                value = f"({len(value)}) {', '.join(value)[:100]}..."
-                markdown += f"**{key}:** {value}  \n"
-                continue
-            # Special case for Parameter Store Metadata
-            if key == "param_meta":
-                model_name = summary["name"]
-                meta_data = self.params.get(f"/workbench/models/{model_name}/meta", warn=False)
-                if meta_data:
-                    markdown += dict_to_markdown(meta_data, title="Additional Metadata")
-                continue
-            # Special case for tags
-            if key == "workbench_tags":
-                tags = summary.get(key, "")
-                markdown += tags_to_markdown(tags)
-                continue
-            # Get the value
-            value = summary.get(key, "-")
-            # If the value is a list, convert it to a comma-separated string
-            if isinstance(value, list):
-                value = ", ".join(value)
-            # Chop off the "workbench_" prefix
-            key = key.replace("workbench_", "")
-            # Add to markdown string
-            markdown += f"**{key}:** {value}  \n"
+        # Health tags
+        markdown += health_tag_markdown(summary.get("health_tags", []))
+        # Simple fields
+        markdown += f"**input:** {summary.get('input', '-')}  \n"
+        endpoints = ", ".join(summary.get("workbench_registered_endpoints", []))
+        markdown += f"**registered_endpoints:** {endpoints or '-'}  \n"
+        markdown += f"**model_type:** {summary.get('workbench_model_type', '-')}  \n"
+        markdown += f"**model_target:** {summary.get('workbench_model_target', '-')}  \n"
+        # Features (truncated)
+        features = summary.get("workbench_model_features", [])
+        features_str = f"({len(features)}) {', '.join(features)[:100]}..."
+        markdown += f"**features:** {features_str}  \n"
+        # Parameter Store metadata
+        model_name = summary["name"]
+        meta_data = self.params.get(f"/workbench/models/{model_name}/meta", warn=False)
+        if meta_data:
+            markdown += dict_to_markdown(meta_data, title="Additional Metadata")
+        # Tags
+        markdown += tags_to_markdown(summary.get("workbench_tags", "")) + "  \n"
+        # Hyperparameters
+        hyperparams = summary.get("hyperparameters")
+        if hyperparams and isinstance(hyperparams, dict):
+            markdown += dict_to_collapsible_html(hyperparams, title="Hyperparameters", collapse_all=True)
         return markdown
@@ -219,18 +193,6 @@ class ModelDetails(PluginInterface):
             markdown += dict_to_markdown(inference_data, title="Additional Inference Metrics")
         return markdown
-    def cross_metrics(self) -> str:
-        # Get cross fold metrics if they exist
-        # Note: Currently not used since we show cross fold metrics in the dropdown
-        model_name = self.current_model.name
-        cross_fold_data = self.params.get(f"/workbench/models/{model_name}/inference/cross_fold", warn=False)
-        if not cross_fold_data:
-            return "**No Cross Fold Data**"
-        # Convert the cross fold data to a markdown string
-        html = dict_to_collapsible_html(cross_fold_data)
-        return html
     def get_inference_runs(self):
         """Get the inference runs for the model

workbench/web_interface/components/plugins/scatter_plot.py CHANGED Viewed

@@ -420,21 +420,17 @@ if __name__ == "__main__":
     df = pd.DataFrame(data)
     # Get a UQ regressor model
-    # from workbench.api import Endpoint, DFStore
-    # end = Endpoint("aqsol-uq")
-    # df = end.auto_inference()
-    # DFStore().upsert("/workbench/models/aqsol-uq/auto_inference", df)
+    from workbench.api import Model
-    from workbench.api import DFStore
-    df = DFStore().get("/workbench/models/aqsol-uq-100/full_cross_fold_inference")
+    model = Model("logd-reg-xgb")
+    df = model.get_inference_predictions("full_cross_fold")
     # Run the Unit Test on the Plugin
     PluginUnitTest(
         ScatterPlot,
         input_data=df,
         theme="midnight_blue",
-        x="solubility",
+        x="logd",
         y="prediction",
         color="prediction_std",
         suppress_hover_display=True,

{workbench-0.8.202.dist-info → workbench-0.8.220.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: workbench
-Version: 0.8.202
+Version: 0.8.220
 Summary: Workbench: A Dashboard and Python API for creating and deploying AWS SageMaker Model Pipelines
 Author-email: SuperCowPowers LLC <support@supercowpowers.com>
 License: MIT License
@@ -47,15 +47,16 @@ Requires-Dist: cryptography>=44.0.2
 Requires-Dist: ipython>=8.37.0
 Requires-Dist: pyreadline3; sys_platform == "win32"
 Requires-Dist: scikit-learn>=1.5.2
+Requires-Dist: umap-learn>=0.5.8
 Requires-Dist: xgboost>=3.0.3
 Requires-Dist: joblib>=1.3.2
 Requires-Dist: requests>=2.26.0
 Requires-Dist: rdkit>=2024.9.5
 Requires-Dist: mordredcommunity>=2.0.6
-Requires-Dist: workbench-bridges>=0.1.10
+Requires-Dist: workbench-bridges>=0.1.16
 Provides-Extra: ui
 Requires-Dist: plotly>=6.0.0; extra == "ui"
-Requires-Dist: dash>3.0.0; extra == "ui"
+Requires-Dist: dash>=3.0.0; extra == "ui"
 Requires-Dist: dash-bootstrap-components>=1.6.0; extra == "ui"
 Requires-Dist: dash-bootstrap-templates>=1.3.0; extra == "ui"
 Requires-Dist: dash_ag_grid; extra == "ui"
@@ -70,8 +71,8 @@ Requires-Dist: flake8; extra == "dev"
 Requires-Dist: black; extra == "dev"
 Provides-Extra: all
 Requires-Dist: networkx>=3.2; extra == "all"
-Requires-Dist: plotly>=5.18.0; extra == "all"
-Requires-Dist: dash<3.0.0,>=2.16.1; extra == "all"
+Requires-Dist: plotly>=6.0.0; extra == "all"
+Requires-Dist: dash>=3.0.0; extra == "all"
 Requires-Dist: dash-bootstrap-components>=1.6.0; extra == "all"
 Requires-Dist: dash-bootstrap-templates>=1.3.0; extra == "all"
 Requires-Dist: dash_ag_grid; extra == "all"

workbench 0.8.202__py3-none-any.whl → 0.8.220__py3-none-any.whl

Potentially problematic release.

workbench 0.8.202py3-none-any.whl → 0.8.220py3-none-any.whl