PyPI - workbench - Versions diffs - 0.8.193__py3-none-any.whl → 0.8.197__py3-none-any.whl - Mend

workbench 0.8.193py3-none-any.whl → 0.8.197py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (29) hide show

workbench/model_scripts/uq_models/mapie.template CHANGED Viewed

@@ -14,7 +14,7 @@ import joblib
 import os
 import numpy as np
 import pandas as pd
-from typing import List, Tuple
+from typing import List, Tuple, Optional, Dict
 # Template Placeholders
 TEMPLATE_PARAMS = {
@@ -26,6 +26,46 @@ TEMPLATE_PARAMS = {
 }
+def compute_confidence(
+        df: pd.DataFrame,
+        median_interval_width: float,
+        lower_q: str = "q_10",
+        upper_q: str = "q_90",
+        alpha: float = 1.0,
+        beta: float = 1.0,
+) -> pd.DataFrame:
+    """
+    Compute confidence scores (0.0 to 1.0) based on prediction interval width
+    and distance from median using exponential decay.
+    Args:
+        df: DataFrame with 'prediction', 'q_50', and quantile columns
+        median_interval_width: Pre-computed median interval width from training data
+        lower_q: Lower quantile column name (default: 'q_10')
+        upper_q: Upper quantile column name (default: 'q_90')
+        alpha: Weight for interval width term (default: 1.0)
+        beta: Weight for distance from median term (default: 1.0)
+    Returns:
+        DataFrame with added 'confidence' column
+    """
+    # Interval width
+    interval_width = (df[upper_q] - df[lower_q]).abs()
+    # Distance from median, normalized by interval width
+    distance_from_median = (df['prediction'] - df['q_50']).abs()
+    normalized_distance = distance_from_median / (interval_width + 1e-6)
+    # Cap the distance penalty at 1.0
+    normalized_distance = np.minimum(normalized_distance, 1.0)
+    # Confidence using exponential decay
+    interval_term = interval_width / median_interval_width
+    df['confidence'] = np.exp(-(alpha * interval_term + beta * normalized_distance))
+    return df
 # Function to check if dataframe is empty
 def check_dataframe(df: pd.DataFrame, df_name: str) -> None:
     """
@@ -98,7 +138,7 @@ def convert_categorical_types(df: pd.DataFrame, features: list, category_mapping
 def decompress_features(
-    df: pd.DataFrame, features: List[str], compressed_features: List[str]
+        df: pd.DataFrame, features: List[str], compressed_features: List[str]
 ) -> Tuple[pd.DataFrame, List[str]]:
     """Prepare features for the model by decompressing bitstring features
@@ -302,6 +342,46 @@ if __name__ == "__main__":
         widths = y_pis[:, 1, 0] - y_pis[:, 0, 0]
         print(f"  {conf_level * 100:.0f}% CI: Mean width={np.mean(widths):.3f}, Std={np.std(widths):.3f}")
+    # Compute normalization statistics for confidence calculation
+    print(f"\nComputing normalization statistics for confidence scores...")
+    # Create a temporary validation dataframe with predictions
+    temp_val_df = df_val.copy()
+    temp_val_df["prediction"] = xgb_model.predict(X_validate)
+    # Add all quantile predictions
+    for conf_level in confidence_levels:
+        model_name = f"mapie_{conf_level:.2f}"
+        model = mapie_models[model_name]
+        y_pred, y_pis = model.predict_interval(X_validate)
+        if conf_level == 0.50:
+            temp_val_df["q_25"] = y_pis[:, 0, 0]
+            temp_val_df["q_75"] = y_pis[:, 1, 0]
+            # y_pred is the median prediction
+            temp_val_df["q_50"] = y_pred
+        elif conf_level == 0.68:
+            temp_val_df["q_16"] = y_pis[:, 0, 0]
+            temp_val_df["q_84"] = y_pis[:, 1, 0]
+        elif conf_level == 0.80:
+            temp_val_df["q_10"] = y_pis[:, 0, 0]
+            temp_val_df["q_90"] = y_pis[:, 1, 0]
+        elif conf_level == 0.90:
+            temp_val_df["q_05"] = y_pis[:, 0, 0]
+            temp_val_df["q_95"] = y_pis[:, 1, 0]
+        elif conf_level == 0.95:
+            temp_val_df["q_025"] = y_pis[:, 0, 0]
+            temp_val_df["q_975"] = y_pis[:, 1, 0]
+    # Compute normalization stats using q_10 and q_90 (default range)
+    interval_width = (temp_val_df["q_90"] - temp_val_df["q_10"]).abs()
+    median_interval_width = float(interval_width.median())
+    print(f"  Median interval width (q_10-q_90): {median_interval_width:.6f}")
+    # Save median interval width for confidence calculation
+    with open(os.path.join(args.model_dir, "median_interval_width.json"), "w") as fp:
+        json.dump(median_interval_width, fp)
     # Save the trained XGBoost model
     joblib.dump(xgb_model, os.path.join(args.model_dir, "xgb_model.joblib"))
@@ -365,11 +445,19 @@ def model_fn(model_dir) -> dict:
         with open(category_path) as fp:
             category_mappings = json.load(fp)
+    # Load median interval width for confidence calculation
+    median_interval_width = None
+    median_width_path = os.path.join(model_dir, "median_interval_width.json")
+    if os.path.exists(median_width_path):
+        with open(median_width_path) as fp:
+            median_interval_width = json.load(fp)
     return {
         "xgb_model": xgb_model,
         "mapie_models": mapie_models,
         "confidence_levels": config["confidence_levels"],
         "category_mappings": category_mappings,
+        "median_interval_width": median_interval_width,
     }
@@ -449,6 +537,8 @@ def predict_fn(df, models) -> pd.DataFrame:
         if conf_level == 0.50:  # 50% CI
             df["q_25"] = y_pis[:, 0, 0]
             df["q_75"] = y_pis[:, 1, 0]
+            # y_pred is the median prediction
+            df["q_50"] = y_pred
         elif conf_level == 0.68:  # 68% CI
             df["q_16"] = y_pis[:, 0, 0]
             df["q_84"] = y_pis[:, 1, 0]
@@ -462,14 +552,11 @@ def predict_fn(df, models) -> pd.DataFrame:
             df["q_025"] = y_pis[:, 0, 0]
             df["q_975"] = y_pis[:, 1, 0]
-    # Add median (q_50) from XGBoost prediction
-    df["q_50"] = df["prediction"]
     # Calculate a pseudo-standard deviation from the 68% interval width
     df["prediction_std"] = (df["q_84"] - df["q_16"]).abs() / 2.0
     # Reorder the quantile columns for easier reading
-    quantile_cols = ["q_025", "q_05", "q_10", "q_16", "q_25", "q_75", "q_84", "q_90", "q_95", "q_975"]
+    quantile_cols = ["q_025", "q_05", "q_10", "q_16", "q_25", "q_50", "q_75", "q_84", "q_90", "q_95", "q_975"]
     other_cols = [col for col in df.columns if col not in quantile_cols]
     df = df[other_cols + quantile_cols]
@@ -489,4 +576,14 @@ def predict_fn(df, models) -> pd.DataFrame:
         df["q_95"] = np.maximum(df["q_95"], df["prediction"])
         df["q_975"] = np.maximum(df["q_975"], df["prediction"])
+    # Compute confidence scores using pre-computed normalization stats
+    df = compute_confidence(
+        df,
+        lower_q="q_10",
+        upper_q="q_90",
+        alpha=1.0,
+        beta=1.0,
+        median_interval_width=models["median_interval_width"],
+    )
     return df

workbench/model_scripts/xgb_model/generated_model_script.py CHANGED Viewed

@@ -28,11 +28,11 @@ from typing import List, Tuple
 # Template Parameters
 TEMPLATE_PARAMS = {
-    "model_type": "regressor",
-    "target": "solubility",
-    "features": ['molwt', 'mollogp', 'molmr', 'heavyatomcount', 'numhacceptors', 'numhdonors', 'numheteroatoms', 'numrotatablebonds', 'numvalenceelectrons', 'numaromaticrings', 'numsaturatedrings', 'numaliphaticrings', 'ringcount', 'tpsa', 'labuteasa', 'balabanj', 'bertzct'],
+    "model_type": "classifier",
+    "target": "wine_class",
+    "features": ['alcohol', 'malic_acid', 'ash', 'alcalinity_of_ash', 'magnesium', 'total_phenols', 'flavanoids', 'nonflavanoid_phenols', 'proanthocyanins', 'color_intensity', 'hue', 'od280_od315_of_diluted_wines', 'proline'],
     "compressed_features": [],
-    "model_metrics_s3_path": "s3://sandbox-sageworks-artifacts/models/aqsol-regression/training",
+    "model_metrics_s3_path": "s3://sandbox-sageworks-artifacts/models/wine-classification/training",
     "train_all_data": False,
     "hyperparameters": {},
 }

workbench/repl/workbench_shell.py CHANGED Viewed

@@ -525,7 +525,7 @@ class WorkbenchShell:
     def get_meta(self):
         return self.meta
-    def plot_manager(self, data, plot_type: str = "table", **kwargs):
+    def plot_manager(self, data, plot_type: str = "scatter", **kwargs):
         """Plot Manager for Workbench"""
         from workbench.web_interface.components.plugins import ag_table, graph_plot, scatter_plot
@@ -564,10 +564,10 @@ class WorkbenchShell:
         plugin_test = PluginUnitTest(plugin_class, theme=theme, input_data=data, **kwargs)
-        # Run the server and open in the browser
-        plugin_test.run()
+        # Open the browser and run the dash server
         url = f"http://127.0.0.1:{plugin_test.port}"
         webbrowser.open(url)
+        plugin_test.run()
 # Launch Shell Entry Point

workbench/utils/model_utils.py CHANGED Viewed

@@ -113,9 +113,16 @@ def proximity_model_local(model: "Model"):
     fs = FeatureSet(model.get_input())
     id_column = fs.id_column
-    # Create the Proximity Model from our Training Data
-    df = model.training_view().pull_dataframe()
-    return Proximity(df, id_column, features, target, track_columns=features)
+    # Create the Proximity Model from both the full FeatureSet and the Model training data
+    full_df = fs.pull_dataframe()
+    model_df = model.training_view().pull_dataframe()
+    # Mark rows that are in the model
+    model_ids = set(model_df[id_column])
+    full_df["in_model"] = full_df[id_column].isin(model_ids)
+    # Create and return the Proximity Model
+    return Proximity(full_df, id_column, features, target, track_columns=features)
 def proximity_model(model: "Model", prox_model_name: str, track_columns: list = None) -> "Model":
@@ -165,9 +172,6 @@ def uq_model(model: "Model", uq_model_name: str, train_all_data: bool = False) -
     """
     from workbench.api import Model, ModelType, FeatureSet  # noqa: F401 (avoid circular import)
-    # Get the custom script path for the UQ model
-    script_path = get_custom_script_path("uq_models", "mapie.template")
     # Get Feature and Target Columns from the existing given Model
     features = model.features()
     target = model.target()
@@ -182,7 +186,6 @@ def uq_model(model: "Model", uq_model_name: str, train_all_data: bool = False) -
         description=f"UQ Model for {model.name}",
         tags=["uq", model.name],
         train_all_data=train_all_data,
-        custom_script=script_path,
         custom_args={"id_column": fs.id_column, "track_columns": [target]},
     )
     return uq_model

workbench/utils/xgboost_model_utils.py CHANGED Viewed

@@ -7,12 +7,11 @@ import joblib
 import pickle
 import glob
 import awswrangler as wr
-from typing import Optional, List, Tuple
+from typing import Optional, List, Tuple, Any
 import hashlib
 import pandas as pd
 import numpy as np
 import xgboost as xgb
-from typing import Dict, Any
 from sklearn.model_selection import KFold, StratifiedKFold
 from sklearn.metrics import (
     precision_recall_fscore_support,
@@ -20,13 +19,14 @@ from sklearn.metrics import (
     mean_absolute_error,
     r2_score,
     median_absolute_error,
+    roc_auc_score,
 )
 from scipy.stats import spearmanr
 from sklearn.preprocessing import LabelEncoder
 # Workbench Imports
 from workbench.utils.model_utils import load_category_mappings_from_s3, safe_extract_tarfile
-from workbench.utils.pandas_utils import convert_categorical_types
+from workbench.utils.pandas_utils import convert_categorical_types, expand_proba_column
 # Set up the log
 log = logging.getLogger("workbench")
@@ -258,7 +258,7 @@ def leaf_stats(df: pd.DataFrame, target_col: str) -> pd.DataFrame:
     return result_df
-def cross_fold_inference(workbench_model: Any, nfolds: int = 5) -> Tuple[Dict[str, Any], pd.DataFrame]:
+def cross_fold_inference(workbench_model: Any, nfolds: int = 5) -> Tuple[pd.DataFrame, pd.DataFrame]:
     """
     Performs K-fold cross-validation with detailed metrics.
     Args:
@@ -266,10 +266,8 @@ def cross_fold_inference(workbench_model: Any, nfolds: int = 5) -> Tuple[Dict[st
         nfolds: Number of folds for cross-validation (default is 5)
     Returns:
         Tuple of:
-            - Dictionary containing:
-                - folds: Dictionary of formatted strings for each fold
-                - summary_metrics: Summary metrics across folds
-            - DataFrame with columns: id, target, prediction (out-of-fold predictions for all samples)
+            - DataFrame with per-class metrics (and 'all' row for overall metrics)
+            - DataFrame with columns: id, target, prediction, and *_proba columns (for classifiers)
     """
     from workbench.api import FeatureSet
@@ -278,7 +276,7 @@ def cross_fold_inference(workbench_model: Any, nfolds: int = 5) -> Tuple[Dict[st
     loaded_model = xgboost_model_from_s3(model_artifact_uri)
     if loaded_model is None:
         log.error("No XGBoost model found in the artifact.")
-        return {}, pd.DataFrame()
+        return pd.DataFrame(), pd.DataFrame()
     # Check if we got a full sklearn model or need to create one
     if isinstance(loaded_model, (xgb.XGBClassifier, xgb.XGBRegressor)):
@@ -304,7 +302,7 @@ def cross_fold_inference(workbench_model: Any, nfolds: int = 5) -> Tuple[Dict[st
         xgb_model._Booster = loaded_model
     else:
         log.error(f"Unexpected model type: {type(loaded_model)}")
-        return {}, pd.DataFrame()
+        return pd.DataFrame(), pd.DataFrame()
     # Prepare data
     fs = FeatureSet(workbench_model.get_input())
@@ -335,12 +333,12 @@ def cross_fold_inference(workbench_model: Any, nfolds: int = 5) -> Tuple[Dict[st
         y_for_cv = y
     # Prepare KFold
+    # Note: random_state=42 seems to not actually give us reproducible results
     kfold = (StratifiedKFold if is_classifier else KFold)(n_splits=nfolds, shuffle=True, random_state=42)
     # Initialize results collection
     fold_metrics = []
-    predictions_df = pd.DataFrame({id_col: ids, target_col: y})  # Keep original values
-    # Note: 'prediction' column will be created automatically with correct dtype
+    predictions_df = pd.DataFrame({id_col: ids, target_col: y})
     # Perform cross-validation
     for fold_idx, (train_idx, val_idx) in enumerate(kfold.split(X, y_for_cv), 1):
@@ -355,6 +353,8 @@ def cross_fold_inference(workbench_model: Any, nfolds: int = 5) -> Tuple[Dict[st
         val_indices = X_val.index
         if is_classifier:
             predictions_df.loc[val_indices, "prediction"] = label_encoder.inverse_transform(preds.astype(int))
+            y_proba = xgb_model.predict_proba(X_val)
+            predictions_df.loc[val_indices, "pred_proba"] = pd.Series(y_proba.tolist(), index=val_indices)
         else:
             predictions_df.loc[val_indices, "prediction"] = preds
@@ -362,10 +362,34 @@ def cross_fold_inference(workbench_model: Any, nfolds: int = 5) -> Tuple[Dict[st
         if is_classifier:
             y_val_orig = label_encoder.inverse_transform(y_val)
             preds_orig = label_encoder.inverse_transform(preds.astype(int))
+            # Overall weighted metrics
             prec, rec, f1, _ = precision_recall_fscore_support(
                 y_val_orig, preds_orig, average="weighted", zero_division=0
             )
-            fold_metrics.append({"fold": fold_idx, "precision": prec, "recall": rec, "fscore": f1})
+            # Per-class F1
+            prec_per_class, rec_per_class, f1_per_class, _ = precision_recall_fscore_support(
+                y_val_orig, preds_orig, average=None, zero_division=0, labels=label_encoder.classes_
+            )
+            # ROC-AUC (overall and per-class)
+            roc_auc_overall = roc_auc_score(y_val, y_proba, multi_class="ovr", average="macro")
+            roc_auc_per_class = roc_auc_score(y_val, y_proba, multi_class="ovr", average=None)
+            fold_metrics.append(
+                {
+                    "fold": fold_idx,
+                    "precision": prec,
+                    "recall": rec,
+                    "f1": f1,
+                    "roc_auc": roc_auc_overall,
+                    "precision_per_class": prec_per_class,
+                    "recall_per_class": rec_per_class,
+                    "f1_per_class": f1_per_class,
+                    "roc_auc_per_class": roc_auc_per_class,
+                }
+            )
         else:
             spearman_corr, _ = spearmanr(y_val, preds)
             fold_metrics.append(
@@ -379,32 +403,67 @@ def cross_fold_inference(workbench_model: Any, nfolds: int = 5) -> Tuple[Dict[st
                 }
             )
-    # Calculate summary metrics (mean ± std)
+    # Calculate summary metrics
     fold_df = pd.DataFrame(fold_metrics)
-    metric_names = ["precision", "recall", "fscore"] if is_classifier else ["rmse", "mae", "medae", "r2", "spearmanr"]
-    summary_metrics = {metric: f"{fold_df[metric].mean():.3f} ±{fold_df[metric].std():.3f}" for metric in metric_names}
-    # Format fold results for display
-    formatted_folds = {}
-    for _, row in fold_df.iterrows():
-        fold_key = f"Fold {int(row['fold'])}"
-        if is_classifier:
-            formatted_folds[fold_key] = (
-                f"precision: {row['precision']:.3f}  " f"recall: {row['recall']:.3f}  " f"fscore: {row['fscore']:.3f}"
-            )
-        else:
-            formatted_folds[fold_key] = (
-                f"rmse: {row['rmse']:.3f}  "
-                f"mae: {row['mae']:.3f}  "
-                f"medae: {row['medae']:.3f}  "
-                f"r2: {row['r2']:.3f}  "
-                f"spearmanr: {row['spearmanr']:.3f}"
+    if is_classifier:
+        # Expand the *_proba columns into separate columns for easier handling
+        predictions_df = expand_proba_column(predictions_df, label_encoder.classes_)
+        # Build per-class metrics DataFrame
+        metric_rows = []
+        # Per-class rows
+        for idx, class_name in enumerate(label_encoder.classes_):
+            prec_scores = np.array([fold["precision_per_class"][idx] for fold in fold_metrics])
+            rec_scores = np.array([fold["recall_per_class"][idx] for fold in fold_metrics])
+            f1_scores = np.array([fold["f1_per_class"][idx] for fold in fold_metrics])
+            roc_auc_scores = np.array([fold["roc_auc_per_class"][idx] for fold in fold_metrics])
+            y_orig = label_encoder.inverse_transform(y_for_cv)
+            support = int((y_orig == class_name).sum())
+            metric_rows.append(
+                {
+                    "class": class_name,
+                    "precision": prec_scores.mean(),
+                    "recall": rec_scores.mean(),
+                    "f1": f1_scores.mean(),
+                    "roc_auc": roc_auc_scores.mean(),
+                    "support": support,
+                }
             )
-    # Build return dictionary
-    metrics_dict = {"summary_metrics": summary_metrics, "folds": formatted_folds}
+        # Overall 'all' row
+        metric_rows.append(
+            {
+                "class": "all",
+                "precision": fold_df["precision"].mean(),
+                "recall": fold_df["recall"].mean(),
+                "f1": fold_df["f1"].mean(),
+                "roc_auc": fold_df["roc_auc"].mean(),
+                "support": len(y_for_cv),
+            }
+        )
+        metrics_df = pd.DataFrame(metric_rows)
+    else:
+        # Regression metrics
+        metrics_df = pd.DataFrame(
+            [
+                {
+                    "rmse": fold_df["rmse"].mean(),
+                    "mae": fold_df["mae"].mean(),
+                    "medae": fold_df["medae"].mean(),
+                    "r2": fold_df["r2"].mean(),
+                    "spearmanr": fold_df["spearmanr"].mean(),
+                    "support": len(y_for_cv),
+                }
+            ]
+        )
-    return metrics_dict, predictions_df
+    return metrics_df, predictions_df
 def leave_one_out_inference(workbench_model: Any) -> pd.DataFrame:

workbench/web_interface/components/plugin_unit_test.py CHANGED Viewed

@@ -156,10 +156,13 @@ class PluginUnitTest:
         """Run the Dash server for the plugin, handling common errors gracefully."""
         while self.is_port_in_use(self.port):
             log.info(f"Port {self.port} is in use. Trying the next one...")
-            self.port += 1  # Increment the port number until an available one is found
+            self.port += 1
         log.info(f"Starting Dash server on port {self.port}...")
-        self.app.run(debug=True, use_reloader=False, port=self.port)
+        try:
+            self.app.run(debug=True, use_reloader=False, port=self.port)
+        except KeyboardInterrupt:
+            log.info("Shutting down Dash server...")
     @staticmethod
     def is_port_in_use(port):

workbench/web_interface/components/plugins/model_details.py CHANGED Viewed

@@ -45,8 +45,6 @@ class ModelDetails(PluginInterface):
                 html.H5(children="Inference Metrics", style={"marginTop": "20px"}),
                 dcc.Dropdown(id=f"{self.component_id}-dropdown", className="dropdown"),
                 dcc.Markdown(id=f"{self.component_id}-metrics"),
-                html.H5(children="Cross Fold Metrics", style={"marginTop": "20px"}),
-                dcc.Markdown(id=f"{self.component_id}-cross-metrics", dangerously_allow_html=True),
             ],
         )
@@ -57,7 +55,6 @@ class ModelDetails(PluginInterface):
             (f"{self.component_id}-dropdown", "options"),
             (f"{self.component_id}-dropdown", "value"),
             (f"{self.component_id}-metrics", "children"),
-            (f"{self.component_id}-cross-metrics", "children"),
         ]
         self.signals = [(f"{self.component_id}-dropdown", "value")]
@@ -84,10 +81,9 @@ class ModelDetails(PluginInterface):
         # Populate the inference runs dropdown
         inference_runs, default_run = self.get_inference_runs()
         metrics = self.inference_metrics(default_run)
-        cross_metrics = self.cross_metrics()
         # Return the updated property values for the plugin
-        return [header, details, inference_runs, default_run, metrics, cross_metrics]
+        return [header, details, inference_runs, default_run, metrics]
     def register_internal_callbacks(self):
         @callback(
@@ -225,6 +221,7 @@ class ModelDetails(PluginInterface):
     def cross_metrics(self) -> str:
         # Get cross fold metrics if they exist
+        # Note: Currently not used since we show cross fold metrics in the dropdown
         model_name = self.current_model.name
         cross_fold_data = self.params.get(f"/workbench/models/{model_name}/inference/cross_fold", warn=False)
         if not cross_fold_data:

{workbench-0.8.193.dist-info → workbench-0.8.197.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: workbench
-Version: 0.8.193
+Version: 0.8.197
 Summary: Workbench: A Dashboard and Python API for creating and deploying AWS SageMaker Model Pipelines
 Author-email: SuperCowPowers LLC <support@supercowpowers.com>
 License: MIT License

workbench 0.8.193__py3-none-any.whl → 0.8.197__py3-none-any.whl

workbench 0.8.193py3-none-any.whl → 0.8.197py3-none-any.whl