PyPI - workbench - Versions diffs - 0.8.168__py3-none-any.whl → 0.8.193__py3-none-any.whl - Mend

workbench 0.8.168py3-none-any.whl → 0.8.193py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (90) hide show

workbench/algorithms/dataframe/proximity.py +143 -102
workbench/algorithms/graph/light/proximity_graph.py +2 -1
workbench/api/compound.py +1 -1
workbench/api/endpoint.py +3 -2
workbench/api/feature_set.py +4 -4
workbench/api/model.py +16 -12
workbench/api/monitor.py +1 -16
workbench/core/artifacts/artifact.py +11 -3
workbench/core/artifacts/data_capture_core.py +355 -0
workbench/core/artifacts/endpoint_core.py +113 -27
workbench/core/artifacts/feature_set_core.py +72 -13
workbench/core/artifacts/model_core.py +71 -49
workbench/core/artifacts/monitor_core.py +33 -249
workbench/core/cloud_platform/aws/aws_account_clamp.py +50 -1
workbench/core/cloud_platform/aws/aws_meta.py +11 -4
workbench/core/transforms/data_to_features/light/molecular_descriptors.py +4 -4
workbench/core/transforms/features_to_model/features_to_model.py +11 -6
workbench/core/transforms/model_to_endpoint/model_to_endpoint.py +36 -6
workbench/core/transforms/pandas_transforms/pandas_to_features.py +27 -0
workbench/core/views/training_view.py +49 -53
workbench/core/views/view.py +51 -1
workbench/core/views/view_utils.py +4 -4
workbench/model_scripts/custom_models/chem_info/mol_descriptors.py +483 -0
workbench/model_scripts/custom_models/chem_info/mol_standardize.py +450 -0
workbench/model_scripts/custom_models/chem_info/molecular_descriptors.py +7 -9
workbench/model_scripts/custom_models/proximity/feature_space_proximity.template +3 -5
workbench/model_scripts/custom_models/proximity/proximity.py +143 -102
workbench/model_scripts/custom_models/uq_models/bayesian_ridge.template +7 -8
workbench/model_scripts/custom_models/uq_models/ensemble_xgb.template +10 -17
workbench/model_scripts/custom_models/uq_models/gaussian_process.template +5 -11
workbench/model_scripts/custom_models/uq_models/meta_uq.template +156 -58
workbench/model_scripts/custom_models/uq_models/ngboost.template +20 -14
workbench/model_scripts/custom_models/uq_models/proximity.py +143 -102
workbench/model_scripts/custom_models/uq_models/requirements.txt +1 -3
workbench/model_scripts/ensemble_xgb/ensemble_xgb.template +5 -13
workbench/model_scripts/pytorch_model/pytorch.template +9 -18
workbench/model_scripts/scikit_learn/scikit_learn.template +4 -9
workbench/model_scripts/script_generation.py +7 -2
workbench/model_scripts/uq_models/mapie.template +492 -0
workbench/model_scripts/uq_models/requirements.txt +1 -0
workbench/model_scripts/xgb_model/generated_model_script.py +34 -43
workbench/model_scripts/xgb_model/xgb_model.template +31 -40
workbench/repl/workbench_shell.py +4 -4
workbench/scripts/lambda_launcher.py +63 -0
workbench/scripts/{ml_pipeline_launcher.py → ml_pipeline_batch.py} +49 -51
workbench/scripts/ml_pipeline_sqs.py +186 -0
workbench/utils/chem_utils/__init__.py +0 -0
workbench/utils/chem_utils/fingerprints.py +134 -0
workbench/utils/chem_utils/misc.py +194 -0
workbench/utils/chem_utils/mol_descriptors.py +483 -0
workbench/utils/chem_utils/mol_standardize.py +450 -0
workbench/utils/chem_utils/mol_tagging.py +348 -0
workbench/utils/chem_utils/projections.py +209 -0
workbench/utils/chem_utils/salts.py +256 -0
workbench/utils/chem_utils/sdf.py +292 -0
workbench/utils/chem_utils/toxicity.py +250 -0
workbench/utils/chem_utils/vis.py +253 -0
workbench/utils/config_manager.py +2 -6
workbench/utils/endpoint_utils.py +5 -7
workbench/utils/license_manager.py +2 -6
workbench/utils/model_utils.py +89 -31
workbench/utils/monitor_utils.py +44 -62
workbench/utils/pandas_utils.py +3 -3
workbench/utils/shap_utils.py +10 -2
workbench/utils/workbench_sqs.py +1 -1
workbench/utils/xgboost_model_utils.py +300 -151
workbench/web_interface/components/model_plot.py +7 -1
workbench/web_interface/components/plugins/dashboard_status.py +3 -1
workbench/web_interface/components/plugins/generated_compounds.py +1 -1
workbench/web_interface/components/plugins/model_details.py +7 -2
workbench/web_interface/components/plugins/scatter_plot.py +3 -3
{workbench-0.8.168.dist-info → workbench-0.8.193.dist-info}/METADATA +24 -2
{workbench-0.8.168.dist-info → workbench-0.8.193.dist-info}/RECORD +77 -72
{workbench-0.8.168.dist-info → workbench-0.8.193.dist-info}/entry_points.txt +3 -1
{workbench-0.8.168.dist-info → workbench-0.8.193.dist-info}/licenses/LICENSE +1 -1
workbench/model_scripts/custom_models/chem_info/local_utils.py +0 -769
workbench/model_scripts/custom_models/chem_info/tautomerize.py +0 -83
workbench/model_scripts/custom_models/proximity/generated_model_script.py +0 -138
workbench/model_scripts/custom_models/uq_models/generated_model_script.py +0 -393
workbench/model_scripts/custom_models/uq_models/mapie_xgb.template +0 -203
workbench/model_scripts/ensemble_xgb/generated_model_script.py +0 -279
workbench/model_scripts/pytorch_model/generated_model_script.py +0 -576
workbench/model_scripts/quant_regression/quant_regression.template +0 -279
workbench/model_scripts/quant_regression/requirements.txt +0 -1
workbench/model_scripts/scikit_learn/generated_model_script.py +0 -307
workbench/utils/chem_utils.py +0 -1556
workbench/utils/fast_inference.py +0 -167
workbench/utils/resource_utils.py +0 -39
{workbench-0.8.168.dist-info → workbench-0.8.193.dist-info}/WHEEL +0 -0
{workbench-0.8.168.dist-info → workbench-0.8.193.dist-info}/top_level.txt +0 -0

workbench/utils/xgboost_model_utils.py CHANGED Viewed

@@ -3,7 +3,7 @@
 import logging
 import os
 import tempfile
-import tarfile
+import joblib
 import pickle
 import glob
 import awswrangler as wr
@@ -16,15 +16,16 @@ from typing import Dict, Any
 from sklearn.model_selection import KFold, StratifiedKFold
 from sklearn.metrics import (
     precision_recall_fscore_support,
-    confusion_matrix,
     mean_squared_error,
     mean_absolute_error,
     r2_score,
+    median_absolute_error,
 )
+from scipy.stats import spearmanr
 from sklearn.preprocessing import LabelEncoder
 # Workbench Imports
-from workbench.utils.model_utils import load_category_mappings_from_s3
+from workbench.utils.model_utils import load_category_mappings_from_s3, safe_extract_tarfile
 from workbench.utils.pandas_utils import convert_categorical_types
 # Set up the log
@@ -34,14 +35,12 @@ log = logging.getLogger("workbench")
 def xgboost_model_from_s3(model_artifact_uri: str):
     """
     Download and extract XGBoost model artifact from S3, then load the model into memory.
-    Handles both direct XGBoost model files and pickled models.
-    Ensures categorical feature support is enabled.
     Args:
         model_artifact_uri (str): S3 URI of the model artifact.
     Returns:
-        Loaded XGBoost model or None if unavailable.
+        Loaded XGBoost model (XGBClassifier, XGBRegressor, or Booster) or None if unavailable.
     """
     with tempfile.TemporaryDirectory() as tmpdir:
@@ -50,68 +49,90 @@ def xgboost_model_from_s3(model_artifact_uri: str):
         wr.s3.download(path=model_artifact_uri, local_file=local_tar_path)
         # Extract tarball
-        with tarfile.open(local_tar_path, "r:gz") as tar:
-            tar.extractall(path=tmpdir, filter="data")
+        safe_extract_tarfile(local_tar_path, tmpdir)
         # Define model file patterns to search for (in order of preference)
         patterns = [
-            # Direct XGBoost model files
-            os.path.join(tmpdir, "xgboost-model"),
-            os.path.join(tmpdir, "model"),
-            os.path.join(tmpdir, "*.bin"),
+            # Joblib models (preferred - preserves everything)
+            os.path.join(tmpdir, "*model*.joblib"),
+            os.path.join(tmpdir, "xgb*.joblib"),
+            os.path.join(tmpdir, "**", "*model*.joblib"),
+            os.path.join(tmpdir, "**", "xgb*.joblib"),
+            # Pickle models (also preserves everything)
+            os.path.join(tmpdir, "*model*.pkl"),
+            os.path.join(tmpdir, "xgb*.pkl"),
+            os.path.join(tmpdir, "**", "*model*.pkl"),
+            os.path.join(tmpdir, "**", "xgb*.pkl"),
+            # JSON models (fallback - requires reconstruction)
+            os.path.join(tmpdir, "*model*.json"),
+            os.path.join(tmpdir, "xgb*.json"),
             os.path.join(tmpdir, "**", "*model*.json"),
-            os.path.join(tmpdir, "**", "rmse.json"),
-            # Pickled models
-            os.path.join(tmpdir, "*.pkl"),
-            os.path.join(tmpdir, "**", "*.pkl"),
-            os.path.join(tmpdir, "*.pickle"),
-            os.path.join(tmpdir, "**", "*.pickle"),
+            os.path.join(tmpdir, "**", "xgb*.json"),
         ]
         # Try each pattern
         for pattern in patterns:
-            # Use glob to find all matching files
             for model_path in glob.glob(pattern, recursive=True):
-                # Determine file type by extension
+                # Skip files that are clearly not XGBoost models
+                filename = os.path.basename(model_path).lower()
+                if any(skip in filename for skip in ["label_encoder", "scaler", "preprocessor", "transformer"]):
+                    log.debug(f"Skipping non-model file: {model_path}")
+                    continue
                 _, ext = os.path.splitext(model_path)
                 try:
-                    if ext.lower() in [".pkl", ".pickle"]:
-                        # Handle pickled models
+                    if ext == ".joblib":
+                        model = joblib.load(model_path)
+                        # Verify it's actually an XGBoost model
+                        if isinstance(model, (xgb.XGBClassifier, xgb.XGBRegressor, xgb.Booster)):
+                            log.important(f"Loaded XGBoost model from joblib: {model_path}")
+                            return model
+                        else:
+                            log.debug(f"Skipping non-XGBoost object from {model_path}: {type(model)}")
+                    elif ext in [".pkl", ".pickle"]:
                         with open(model_path, "rb") as f:
                             model = pickle.load(f)
-                        # Handle different model types
-                        if isinstance(model, xgb.Booster):
-                            log.important(f"Loaded XGBoost Booster from pickle: {model_path}")
+                        # Verify it's actually an XGBoost model
+                        if isinstance(model, (xgb.XGBClassifier, xgb.XGBRegressor, xgb.Booster)):
+                            log.important(f"Loaded XGBoost model from pickle: {model_path}")
                             return model
-                        elif hasattr(model, "get_booster"):
-                            log.important(f"Loaded XGBoost model from pipeline: {model_path}")
-                            booster = model.get_booster()
-                            return booster
-                    else:
-                        # Handle direct XGBoost model files
+                        else:
+                            log.debug(f"Skipping non-XGBoost object from {model_path}: {type(model)}")
+                    elif ext == ".json":
+                        # JSON files should be XGBoost models by definition
                         booster = xgb.Booster()
                         booster.load_model(model_path)
-                        log.important(f"Loaded XGBoost model directly: {model_path}")
+                        log.important(f"Loaded XGBoost booster from JSON: {model_path}")
                         return booster
                 except Exception as e:
-                    log.info(f"Failed to load model from {model_path}: {e}")
-                    continue  # Try the next file
+                    log.debug(f"Failed to load {model_path}: {e}")
+                    continue
-    # If no model found
     log.error("No XGBoost model found in the artifact.")
     return None
-def feature_importance(workbench_model, importance_type: str = "weight") -> Optional[List[Tuple[str, float]]]:
+def feature_importance(workbench_model, importance_type: str = "gain") -> Optional[List[Tuple[str, float]]]:
     """
     Get sorted feature importances from a Workbench Model object.
     Args:
         workbench_model: Workbench model object
-        importance_type: Type of feature importance.
-            Options: 'weight', 'gain', 'cover', 'total_gain', 'total_cover'
+        importance_type: Type of feature importance. Options:
+            - 'gain' (default): Average improvement in loss/objective when feature is used.
+                     Best for understanding predictive power of features.
+            - 'weight': Number of times a feature appears in trees (split count).
+                       Useful for understanding model complexity and feature usage frequency.
+            - 'cover': Average number of samples affected when feature is used.
+                      Shows the relative quantity of observations related to this feature.
+            - 'total_gain': Total improvement in loss/objective across all splits.
+                           Similar to 'gain' but not averaged (can be biased toward frequent features).
+            - 'total_cover': Total number of samples affected across all splits.
+                            Similar to 'cover' but not averaged.
     Returns:
         List of tuples (feature, importance) sorted by importance value (descending).
@@ -120,7 +141,8 @@ def feature_importance(workbench_model, importance_type: str = "weight") -> Opti
     Note:
         XGBoost's get_score() only returns features with non-zero importance.
-        This function ensures all model features are included in the output.
+        This function ensures all model features are included in the output,
+        adding zero values for features that weren't used in any tree splits.
     """
     model_artifact_uri = workbench_model.model_data_url()
     xgb_model = xgboost_model_from_s3(model_artifact_uri)
@@ -128,11 +150,18 @@ def feature_importance(workbench_model, importance_type: str = "weight") -> Opti
         log.error("No XGBoost model found in the artifact.")
         return None
-    # Get feature importances (only non-zero features)
-    importances = xgb_model.get_score(importance_type=importance_type)
+    # Check if we got a full sklearn model or just a booster (for backwards compatibility)
+    if hasattr(xgb_model, "get_booster"):
+        # Full sklearn model - get the booster for feature importance
+        booster = xgb_model.get_booster()
+        all_features = booster.feature_names
+    else:
+        # Already a booster (legacy JSON load)
+        booster = xgb_model
+        all_features = xgb_model.feature_names
-    # Get all feature names from the model
-    all_features = xgb_model.feature_names
+    # Get feature importances (only non-zero features)
+    importances = booster.get_score(importance_type=importance_type)
     # Create complete importance dict with zeros for missing features
     complete_importances = {feat: importances.get(feat, 0.0) for feat in all_features}
@@ -229,148 +258,260 @@ def leaf_stats(df: pd.DataFrame, target_col: str) -> pd.DataFrame:
     return result_df
-def cross_fold_inference(workbench_model: Any, nfolds: int = 5) -> Dict[str, Any]:
+def cross_fold_inference(workbench_model: Any, nfolds: int = 5) -> Tuple[Dict[str, Any], pd.DataFrame]:
     """
     Performs K-fold cross-validation with detailed metrics.
     Args:
         workbench_model: Workbench model object
         nfolds: Number of folds for cross-validation (default is 5)
     Returns:
-        Dictionary containing:
-            - folds: Dictionary of formatted strings for each fold
-            - summary_metrics: Summary metrics across folds
-            - overall_metrics: Overall metrics for all folds
+        Tuple of:
+            - Dictionary containing:
+                - folds: Dictionary of formatted strings for each fold
+                - summary_metrics: Summary metrics across folds
+            - DataFrame with columns: id, target, prediction (out-of-fold predictions for all samples)
     """
     from workbench.api import FeatureSet
     # Load model
-    model_type = workbench_model.model_type.value
     model_artifact_uri = workbench_model.model_data_url()
-    loaded_booster = xgboost_model_from_s3(model_artifact_uri)
-    if loaded_booster is None:
+    loaded_model = xgboost_model_from_s3(model_artifact_uri)
+    if loaded_model is None:
         log.error("No XGBoost model found in the artifact.")
-        return {}
-    # Create the model wrapper
-    is_classifier = model_type == "classifier"
-    xgb_model = (
-        xgb.XGBClassifier(enable_categorical=True) if is_classifier else xgb.XGBRegressor(enable_categorical=True)
-    )
-    xgb_model._Booster = loaded_booster
+        return {}, pd.DataFrame()
+    # Check if we got a full sklearn model or need to create one
+    if isinstance(loaded_model, (xgb.XGBClassifier, xgb.XGBRegressor)):
+        is_classifier = isinstance(loaded_model, xgb.XGBClassifier)
+        # Get the model's hyperparameters and ensure enable_categorical=True
+        params = loaded_model.get_params()
+        params["enable_categorical"] = True
+        # Create new model with same params but enable_categorical=True
+        if is_classifier:
+            xgb_model = xgb.XGBClassifier(**params)
+        else:
+            xgb_model = xgb.XGBRegressor(**params)
+    elif isinstance(loaded_model, xgb.Booster):
+        # Legacy: got a booster, need to wrap it
+        log.warning("Deprecated: Loaded model is a Booster, wrapping in sklearn model.")
+        is_classifier = workbench_model.model_type.value == "classifier"
+        xgb_model = (
+            xgb.XGBClassifier(enable_categorical=True) if is_classifier else xgb.XGBRegressor(enable_categorical=True)
+        )
+        xgb_model._Booster = loaded_model
+    else:
+        log.error(f"Unexpected model type: {type(loaded_model)}")
+        return {}, pd.DataFrame()
     # Prepare data
     fs = FeatureSet(workbench_model.get_input())
-    df = fs.pull_dataframe()
+    df = workbench_model.training_view().pull_dataframe()
+    # Get id column - assuming FeatureSet has an id_column attribute or similar
+    id_col = fs.id_column
+    target_col = workbench_model.target()
     feature_cols = workbench_model.features()
-    # Convert string features to categorical
+    # Convert string[python] to object, then to category for XGBoost compatibility
+    # This avoids XGBoost's issue with pandas 2.x string[python] dtype in categorical categories
     for col in feature_cols:
-        if df[col].dtype in ["object", "string"]:
-            df[col] = df[col].astype("category")
-    # Split X and y
-    X = df[workbench_model.features()]
-    y = df[workbench_model.target()]
+        if pd.api.types.is_string_dtype(df[col]):
+            # Double conversion: string[python] -> object -> category
+            df[col] = df[col].astype("object").astype("category")
-    # Encode target if it's a classification problem
+    X = df[feature_cols]
+    y = df[target_col]
+    ids = df[id_col]
+    # Encode target if classifier
     label_encoder = LabelEncoder() if is_classifier else None
     if label_encoder:
-        y = pd.Series(label_encoder.fit_transform(y), name=workbench_model.target())
+        y_encoded = label_encoder.fit_transform(y)
+        y_for_cv = pd.Series(y_encoded, index=y.index, name=target_col)
+    else:
+        y_for_cv = y
     # Prepare KFold
-    kfold = (
-        StratifiedKFold(n_splits=nfolds, shuffle=True, random_state=42)
-        if is_classifier
-        else KFold(n_splits=nfolds, shuffle=True, random_state=42)
-    )
+    kfold = (StratifiedKFold if is_classifier else KFold)(n_splits=nfolds, shuffle=True, random_state=42)
+    # Initialize results collection
+    fold_metrics = []
+    predictions_df = pd.DataFrame({id_col: ids, target_col: y})  # Keep original values
+    # Note: 'prediction' column will be created automatically with correct dtype
-    fold_results = []
-    all_predictions = []
-    all_actuals = []
-    for fold_idx, (train_idx, val_idx) in enumerate(kfold.split(X, y)):
+    # Perform cross-validation
+    for fold_idx, (train_idx, val_idx) in enumerate(kfold.split(X, y_for_cv), 1):
         X_train, X_val = X.iloc[train_idx], X.iloc[val_idx]
-        y_train, y_val = y.iloc[train_idx], y.iloc[val_idx]
+        y_train, y_val = y_for_cv.iloc[train_idx], y_for_cv.iloc[val_idx]
-        # Train the model
+        # Train and predict
         xgb_model.fit(X_train, y_train)
         preds = xgb_model.predict(X_val)
-        all_predictions.extend(preds)
-        all_actuals.extend(y_val)
-        # Calculate metrics for this fold
-        fold_metrics = {"fold": fold_idx + 1}
+        # Store predictions (decode if classifier)
+        val_indices = X_val.index
+        if is_classifier:
+            predictions_df.loc[val_indices, "prediction"] = label_encoder.inverse_transform(preds.astype(int))
+        else:
+            predictions_df.loc[val_indices, "prediction"] = preds
+        # Calculate fold metrics
         if is_classifier:
-            y_val_original = label_encoder.inverse_transform(y_val)
-            preds_original = label_encoder.inverse_transform(preds.astype(int))
-            scores = precision_recall_fscore_support(
-                y_val_original, preds_original, average="weighted", zero_division=0
+            y_val_orig = label_encoder.inverse_transform(y_val)
+            preds_orig = label_encoder.inverse_transform(preds.astype(int))
+            prec, rec, f1, _ = precision_recall_fscore_support(
+                y_val_orig, preds_orig, average="weighted", zero_division=0
             )
-            fold_metrics.update({"precision": float(scores[0]), "recall": float(scores[1]), "fscore": float(scores[2])})
+            fold_metrics.append({"fold": fold_idx, "precision": prec, "recall": rec, "fscore": f1})
         else:
-            fold_metrics.update(
+            spearman_corr, _ = spearmanr(y_val, preds)
+            fold_metrics.append(
                 {
-                    "rmse": float(np.sqrt(mean_squared_error(y_val, preds))),
-                    "mae": float(mean_absolute_error(y_val, preds)),
-                    "r2": float(r2_score(y_val, preds)),
+                    "fold": fold_idx,
+                    "rmse": np.sqrt(mean_squared_error(y_val, preds)),
+                    "mae": mean_absolute_error(y_val, preds),
+                    "medae": median_absolute_error(y_val, preds),
+                    "r2": r2_score(y_val, preds),
+                    "spearmanr": spearman_corr,
                 }
             )
-        fold_results.append(fold_metrics)
-    # Calculate overall metrics
-    overall_metrics = {}
-    if is_classifier:
-        all_actuals_original = label_encoder.inverse_transform(all_actuals)
-        all_predictions_original = label_encoder.inverse_transform(all_predictions)
-        scores = precision_recall_fscore_support(
-            all_actuals_original, all_predictions_original, average="weighted", zero_division=0
-        )
-        overall_metrics.update(
-            {
-                "precision": float(scores[0]),
-                "recall": float(scores[1]),
-                "fscore": float(scores[2]),
-                "confusion_matrix": confusion_matrix(
-                    all_actuals_original, all_predictions_original, labels=label_encoder.classes_
-                ).tolist(),
-                "label_names": list(label_encoder.classes_),
-            }
-        )
-    else:
-        overall_metrics.update(
-            {
-                "rmse": float(np.sqrt(mean_squared_error(all_actuals, all_predictions))),
-                "mae": float(mean_absolute_error(all_actuals, all_predictions)),
-                "r2": float(r2_score(all_actuals, all_predictions)),
-            }
-        )
-    # Calculate summary metrics across folds
-    summary_metrics = {}
-    metrics_to_aggregate = ["precision", "recall", "fscore"] if is_classifier else ["rmse", "mae", "r2"]
-    for metric in metrics_to_aggregate:
-        values = [fold[metric] for fold in fold_results]
-        summary_metrics[metric] = f"{float(np.mean(values)):.3f} ±{float(np.std(values)):.3f}"
-    # Format fold results as strings (TBD section)
+    # Calculate summary metrics (mean ± std)
+    fold_df = pd.DataFrame(fold_metrics)
+    metric_names = ["precision", "recall", "fscore"] if is_classifier else ["rmse", "mae", "medae", "r2", "spearmanr"]
+    summary_metrics = {metric: f"{fold_df[metric].mean():.3f} ±{fold_df[metric].std():.3f}" for metric in metric_names}
+    # Format fold results for display
     formatted_folds = {}
-    for fold_data in fold_results:
-        fold_key = f"Fold {fold_data['fold']}"
+    for _, row in fold_df.iterrows():
+        fold_key = f"Fold {int(row['fold'])}"
         if is_classifier:
             formatted_folds[fold_key] = (
-                f"precision: {fold_data['precision']:.3f}  "
-                f"recall: {fold_data['recall']:.3f}  "
-                f"fscore: {fold_data['fscore']:.3f}"
+                f"precision: {row['precision']:.3f}  " f"recall: {row['recall']:.3f}  " f"fscore: {row['fscore']:.3f}"
             )
         else:
             formatted_folds[fold_key] = (
-                f"rmse: {fold_data['rmse']:.3f}  mae: {fold_data['mae']:.3f}  r2: {fold_data['r2']:.3f}"
+                f"rmse: {row['rmse']:.3f}  "
+                f"mae: {row['mae']:.3f}  "
+                f"medae: {row['medae']:.3f}  "
+                f"r2: {row['r2']:.3f}  "
+                f"spearmanr: {row['spearmanr']:.3f}"
             )
-    # Return the results
-    return {
-        "summary_metrics": summary_metrics,
-        # "overall_metrics": overall_metrics,
-        "folds": formatted_folds,
-    }
+    # Build return dictionary
+    metrics_dict = {"summary_metrics": summary_metrics, "folds": formatted_folds}
+    return metrics_dict, predictions_df
+def leave_one_out_inference(workbench_model: Any) -> pd.DataFrame:
+    """
+    Performs leave-one-out cross-validation (parallelized).
+    For datasets > 1000 rows, first identifies top 100 worst predictions via 10-fold CV,
+    then performs true leave-one-out on those 100 samples.
+    Each model trains on ALL data except one sample.
+    """
+    from workbench.api import FeatureSet
+    from joblib import Parallel, delayed
+    from tqdm import tqdm
+    def train_and_predict_one(model_params, is_classifier, X, y, train_idx, val_idx):
+        """Train on train_idx, predict on val_idx."""
+        model = xgb.XGBClassifier(**model_params) if is_classifier else xgb.XGBRegressor(**model_params)
+        model.fit(X[train_idx], y[train_idx])
+        return model.predict(X[val_idx])[0]
+    # Load model and get params
+    model_artifact_uri = workbench_model.model_data_url()
+    loaded_model = xgboost_model_from_s3(model_artifact_uri)
+    if loaded_model is None:
+        log.error("No XGBoost model found in the artifact.")
+        return pd.DataFrame()
+    if isinstance(loaded_model, (xgb.XGBClassifier, xgb.XGBRegressor)):
+        is_classifier = isinstance(loaded_model, xgb.XGBClassifier)
+        model_params = loaded_model.get_params()
+    elif isinstance(loaded_model, xgb.Booster):
+        log.warning("Deprecated: Loaded model is a Booster, wrapping in sklearn model.")
+        is_classifier = workbench_model.model_type.value == "classifier"
+        model_params = {"enable_categorical": True}
+    else:
+        log.error(f"Unexpected model type: {type(loaded_model)}")
+        return pd.DataFrame()
+    # Load and prepare data
+    fs = FeatureSet(workbench_model.get_input())
+    df = workbench_model.training_view().pull_dataframe()
+    id_col = fs.id_column
+    target_col = workbench_model.target()
+    feature_cols = workbench_model.features()
+    # Convert string[python] to object, then to category for XGBoost compatibility
+    # This avoids XGBoost's issue with pandas 2.x string[python] dtype in categorical categories
+    for col in feature_cols:
+        if pd.api.types.is_string_dtype(df[col]):
+            # Double conversion: string[python] -> object -> category
+            df[col] = df[col].astype("object").astype("category")
+    # Determine which samples to run LOO on
+    if len(df) > 1000:
+        log.important(f"Dataset has {len(df)} rows. Running 10-fold CV to identify top 1000 worst predictions...")
+        _, predictions_df = cross_fold_inference(workbench_model, nfolds=10)
+        predictions_df["residual_abs"] = np.abs(predictions_df[target_col] - predictions_df["prediction"])
+        worst_samples = predictions_df.nlargest(1000, "residual_abs")
+        worst_ids = worst_samples[id_col].values
+        loo_indices = df[df[id_col].isin(worst_ids)].index.values
+        log.important(f"Running leave-one-out CV on 1000 worst samples. Each model trains on {len(df)-1} rows...")
+    else:
+        log.important(f"Running leave-one-out CV on all {len(df)} samples...")
+        loo_indices = df.index.values
+    # Prepare full dataset for training
+    X_full = df[feature_cols].values
+    y_full = df[target_col].values
+    # Encode target if classifier
+    label_encoder = LabelEncoder() if is_classifier else None
+    if label_encoder:
+        y_full = label_encoder.fit_transform(y_full)
+    # Generate LOO splits
+    splits = []
+    for loo_idx in loo_indices:
+        train_idx = np.delete(np.arange(len(X_full)), loo_idx)
+        val_idx = np.array([loo_idx])
+        splits.append((train_idx, val_idx))
+    # Parallel execution
+    predictions = Parallel(n_jobs=4)(
+        delayed(train_and_predict_one)(model_params, is_classifier, X_full, y_full, train_idx, val_idx)
+        for train_idx, val_idx in tqdm(splits, desc="LOO CV")
+    )
+    # Build results dataframe
+    predictions_array = np.array(predictions)
+    if label_encoder:
+        predictions_array = label_encoder.inverse_transform(predictions_array.astype(int))
+    predictions_df = pd.DataFrame(
+        {
+            id_col: df.loc[loo_indices, id_col].values,
+            target_col: df.loc[loo_indices, target_col].values,
+            "prediction": predictions_array,
+        }
+    )
+    predictions_df["residual_abs"] = np.abs(predictions_df[target_col] - predictions_df["prediction"])
+    return predictions_df
 if __name__ == "__main__":
     """Exercise the Model Utilities"""
-    from workbench.api import Model, FeatureSet
+    from workbench.api import Model
     from pprint import pprint
     # Test the XGBoost model loading and feature importance
@@ -383,11 +524,28 @@ if __name__ == "__main__":
     model_artifact_uri = model.model_data_url()
     xgb_model = xgboost_model_from_s3(model_artifact_uri)
+    # Verify enable_categorical is preserved (for debugging/confidence)
+    print(f"Model parameters: {xgb_model.get_params()}")
+    print(f"enable_categorical: {xgb_model.enable_categorical}")
     # Test with UQ Model
     uq_model = Model("aqsol-uq")
     _xgb_model = xgboost_model_from_s3(uq_model.model_data_url())
+    print("\n=== CROSS FOLD REGRESSION EXAMPLE ===")
+    model = Model("abalone-regression")
+    results, df = cross_fold_inference(model)
+    pprint(results)
+    print(df.head())
+    print("\n=== CROSS FOLD CLASSIFICATION EXAMPLE ===")
+    model = Model("wine-classification")
+    results, df = cross_fold_inference(model)
+    pprint(results)
+    print(df.head())
     # Test XGBoost add_leaf_hash
+    """
     input_df = FeatureSet(model.get_input()).pull_dataframe()
     leaf_df = add_leaf_hash(model, input_df)
     print("DataFrame with Leaf Hash:")
@@ -404,13 +562,4 @@ if __name__ == "__main__":
     stats_df = leaf_stats(leaf_df, target_col)
     print("DataFrame with Leaf Statistics:")
     print(stats_df)
-    print("\n=== CROSS FOLD REGRESSION EXAMPLE ===")
-    model = Model("abalone-regression")
-    results = cross_fold_inference(model)
-    pprint(results)
-    print("\n=== CROSS FOLD CLASSIFICATION EXAMPLE ===")
-    model = Model("wine-classification")
-    results = cross_fold_inference(model)
-    pprint(results)
+    """

workbench/web_interface/components/model_plot.py CHANGED Viewed

@@ -39,7 +39,13 @@ class ModelPlot(ComponentInterface):
             # Calculate the distance from the diagonal for each point
             target = model.target()
             df["error"] = abs(df["prediction"] - df[target])
-            return ScatterPlot().update_properties(df, color="error", regression_line=True)[0]
+            return ScatterPlot().update_properties(
+                df,
+                color="error",
+                regression_line=True,
+                x=target,
+                y="prediction",
+            )[0]
         else:
             return self.display_text(f"Model Type: {model.model_type}\n\n Awesome Plot Coming Soon!")

workbench/web_interface/components/plugins/dashboard_status.py CHANGED Viewed

@@ -72,7 +72,9 @@ class DashboardStatus(PluginInterface):
             details = "**Redis:** 🔴 Failed to Connect<br>"
         # Fill in the license details
-        details += f"**Redis Server:** {config_info['REDIS_HOST']}:{config_info.get('REDIS_PORT', 6379)}<br>"
+        redis_host = config_info.get("REDIS_HOST", "NOT SET")
+        redis_port = config_info.get("REDIS_PORT", "NOT SET")
+        details += f"**Redis Server:** {redis_host}:{redis_port}<br>"
         details += f"**Workbench S3 Bucket:** {config_info['WORKBENCH_BUCKET']}<br>"
         details += f"**Plugin Path:** {config_info.get('WORKBENCH_PLUGINS', 'unknown')}<br>"
         details += f"**Themes Path:** {config_info.get('WORKBENCH_THEMES', 'unknown')}<br>"

workbench/web_interface/components/plugins/generated_compounds.py CHANGED Viewed

@@ -5,7 +5,7 @@ import dash_bootstrap_components as dbc
 # Workbench Imports
 from workbench.api.compound import Compound
-from workbench.utils.chem_utils import svg_from_smiles
+from workbench.utils.chem_utils.vis import svg_from_smiles
 from workbench.web_interface.components.plugin_interface import PluginInterface, PluginPage, PluginInputType
 from workbench.utils.theme_manager import ThemeManager
 from workbench.utils.ai_summary import AISummary

workbench/web_interface/components/plugins/model_details.py CHANGED Viewed

@@ -249,8 +249,13 @@ class ModelDetails(PluginInterface):
         if not inference_runs:
             return [], None
-        # Set "auto_inference" as the default, if that doesn't exist, set the first
-        default_inference_run = "auto_inference" if "auto_inference" in inference_runs else inference_runs[0]
+        # Default inference run (full_cross_fold if it exists, then auto_inference, then first)
+        if "full_cross_fold" in inference_runs:
+            default_inference_run = "full_cross_fold"
+        elif "auto_inference" in inference_runs:
+            default_inference_run = "auto_inference"
+        else:
+            default_inference_run = inference_runs[0]
         # Return the options for the dropdown and the selected value
         return inference_runs, default_inference_run

workbench 0.8.168__py3-none-any.whl → 0.8.193__py3-none-any.whl

workbench 0.8.168py3-none-any.whl → 0.8.193py3-none-any.whl