workbench 0.8.168__py3-none-any.whl → 0.8.193__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (90) hide show
  1. workbench/algorithms/dataframe/proximity.py +143 -102
  2. workbench/algorithms/graph/light/proximity_graph.py +2 -1
  3. workbench/api/compound.py +1 -1
  4. workbench/api/endpoint.py +3 -2
  5. workbench/api/feature_set.py +4 -4
  6. workbench/api/model.py +16 -12
  7. workbench/api/monitor.py +1 -16
  8. workbench/core/artifacts/artifact.py +11 -3
  9. workbench/core/artifacts/data_capture_core.py +355 -0
  10. workbench/core/artifacts/endpoint_core.py +113 -27
  11. workbench/core/artifacts/feature_set_core.py +72 -13
  12. workbench/core/artifacts/model_core.py +71 -49
  13. workbench/core/artifacts/monitor_core.py +33 -249
  14. workbench/core/cloud_platform/aws/aws_account_clamp.py +50 -1
  15. workbench/core/cloud_platform/aws/aws_meta.py +11 -4
  16. workbench/core/transforms/data_to_features/light/molecular_descriptors.py +4 -4
  17. workbench/core/transforms/features_to_model/features_to_model.py +11 -6
  18. workbench/core/transforms/model_to_endpoint/model_to_endpoint.py +36 -6
  19. workbench/core/transforms/pandas_transforms/pandas_to_features.py +27 -0
  20. workbench/core/views/training_view.py +49 -53
  21. workbench/core/views/view.py +51 -1
  22. workbench/core/views/view_utils.py +4 -4
  23. workbench/model_scripts/custom_models/chem_info/mol_descriptors.py +483 -0
  24. workbench/model_scripts/custom_models/chem_info/mol_standardize.py +450 -0
  25. workbench/model_scripts/custom_models/chem_info/molecular_descriptors.py +7 -9
  26. workbench/model_scripts/custom_models/proximity/feature_space_proximity.template +3 -5
  27. workbench/model_scripts/custom_models/proximity/proximity.py +143 -102
  28. workbench/model_scripts/custom_models/uq_models/bayesian_ridge.template +7 -8
  29. workbench/model_scripts/custom_models/uq_models/ensemble_xgb.template +10 -17
  30. workbench/model_scripts/custom_models/uq_models/gaussian_process.template +5 -11
  31. workbench/model_scripts/custom_models/uq_models/meta_uq.template +156 -58
  32. workbench/model_scripts/custom_models/uq_models/ngboost.template +20 -14
  33. workbench/model_scripts/custom_models/uq_models/proximity.py +143 -102
  34. workbench/model_scripts/custom_models/uq_models/requirements.txt +1 -3
  35. workbench/model_scripts/ensemble_xgb/ensemble_xgb.template +5 -13
  36. workbench/model_scripts/pytorch_model/pytorch.template +9 -18
  37. workbench/model_scripts/scikit_learn/scikit_learn.template +4 -9
  38. workbench/model_scripts/script_generation.py +7 -2
  39. workbench/model_scripts/uq_models/mapie.template +492 -0
  40. workbench/model_scripts/uq_models/requirements.txt +1 -0
  41. workbench/model_scripts/xgb_model/generated_model_script.py +34 -43
  42. workbench/model_scripts/xgb_model/xgb_model.template +31 -40
  43. workbench/repl/workbench_shell.py +4 -4
  44. workbench/scripts/lambda_launcher.py +63 -0
  45. workbench/scripts/{ml_pipeline_launcher.py → ml_pipeline_batch.py} +49 -51
  46. workbench/scripts/ml_pipeline_sqs.py +186 -0
  47. workbench/utils/chem_utils/__init__.py +0 -0
  48. workbench/utils/chem_utils/fingerprints.py +134 -0
  49. workbench/utils/chem_utils/misc.py +194 -0
  50. workbench/utils/chem_utils/mol_descriptors.py +483 -0
  51. workbench/utils/chem_utils/mol_standardize.py +450 -0
  52. workbench/utils/chem_utils/mol_tagging.py +348 -0
  53. workbench/utils/chem_utils/projections.py +209 -0
  54. workbench/utils/chem_utils/salts.py +256 -0
  55. workbench/utils/chem_utils/sdf.py +292 -0
  56. workbench/utils/chem_utils/toxicity.py +250 -0
  57. workbench/utils/chem_utils/vis.py +253 -0
  58. workbench/utils/config_manager.py +2 -6
  59. workbench/utils/endpoint_utils.py +5 -7
  60. workbench/utils/license_manager.py +2 -6
  61. workbench/utils/model_utils.py +89 -31
  62. workbench/utils/monitor_utils.py +44 -62
  63. workbench/utils/pandas_utils.py +3 -3
  64. workbench/utils/shap_utils.py +10 -2
  65. workbench/utils/workbench_sqs.py +1 -1
  66. workbench/utils/xgboost_model_utils.py +300 -151
  67. workbench/web_interface/components/model_plot.py +7 -1
  68. workbench/web_interface/components/plugins/dashboard_status.py +3 -1
  69. workbench/web_interface/components/plugins/generated_compounds.py +1 -1
  70. workbench/web_interface/components/plugins/model_details.py +7 -2
  71. workbench/web_interface/components/plugins/scatter_plot.py +3 -3
  72. {workbench-0.8.168.dist-info → workbench-0.8.193.dist-info}/METADATA +24 -2
  73. {workbench-0.8.168.dist-info → workbench-0.8.193.dist-info}/RECORD +77 -72
  74. {workbench-0.8.168.dist-info → workbench-0.8.193.dist-info}/entry_points.txt +3 -1
  75. {workbench-0.8.168.dist-info → workbench-0.8.193.dist-info}/licenses/LICENSE +1 -1
  76. workbench/model_scripts/custom_models/chem_info/local_utils.py +0 -769
  77. workbench/model_scripts/custom_models/chem_info/tautomerize.py +0 -83
  78. workbench/model_scripts/custom_models/proximity/generated_model_script.py +0 -138
  79. workbench/model_scripts/custom_models/uq_models/generated_model_script.py +0 -393
  80. workbench/model_scripts/custom_models/uq_models/mapie_xgb.template +0 -203
  81. workbench/model_scripts/ensemble_xgb/generated_model_script.py +0 -279
  82. workbench/model_scripts/pytorch_model/generated_model_script.py +0 -576
  83. workbench/model_scripts/quant_regression/quant_regression.template +0 -279
  84. workbench/model_scripts/quant_regression/requirements.txt +0 -1
  85. workbench/model_scripts/scikit_learn/generated_model_script.py +0 -307
  86. workbench/utils/chem_utils.py +0 -1556
  87. workbench/utils/fast_inference.py +0 -167
  88. workbench/utils/resource_utils.py +0 -39
  89. {workbench-0.8.168.dist-info → workbench-0.8.193.dist-info}/WHEEL +0 -0
  90. {workbench-0.8.168.dist-info → workbench-0.8.193.dist-info}/top_level.txt +0 -0
@@ -3,7 +3,7 @@
3
3
  import logging
4
4
  import os
5
5
  import tempfile
6
- import tarfile
6
+ import joblib
7
7
  import pickle
8
8
  import glob
9
9
  import awswrangler as wr
@@ -16,15 +16,16 @@ from typing import Dict, Any
16
16
  from sklearn.model_selection import KFold, StratifiedKFold
17
17
  from sklearn.metrics import (
18
18
  precision_recall_fscore_support,
19
- confusion_matrix,
20
19
  mean_squared_error,
21
20
  mean_absolute_error,
22
21
  r2_score,
22
+ median_absolute_error,
23
23
  )
24
+ from scipy.stats import spearmanr
24
25
  from sklearn.preprocessing import LabelEncoder
25
26
 
26
27
  # Workbench Imports
27
- from workbench.utils.model_utils import load_category_mappings_from_s3
28
+ from workbench.utils.model_utils import load_category_mappings_from_s3, safe_extract_tarfile
28
29
  from workbench.utils.pandas_utils import convert_categorical_types
29
30
 
30
31
  # Set up the log
@@ -34,14 +35,12 @@ log = logging.getLogger("workbench")
34
35
  def xgboost_model_from_s3(model_artifact_uri: str):
35
36
  """
36
37
  Download and extract XGBoost model artifact from S3, then load the model into memory.
37
- Handles both direct XGBoost model files and pickled models.
38
- Ensures categorical feature support is enabled.
39
38
 
40
39
  Args:
41
40
  model_artifact_uri (str): S3 URI of the model artifact.
42
41
 
43
42
  Returns:
44
- Loaded XGBoost model or None if unavailable.
43
+ Loaded XGBoost model (XGBClassifier, XGBRegressor, or Booster) or None if unavailable.
45
44
  """
46
45
 
47
46
  with tempfile.TemporaryDirectory() as tmpdir:
@@ -50,68 +49,90 @@ def xgboost_model_from_s3(model_artifact_uri: str):
50
49
  wr.s3.download(path=model_artifact_uri, local_file=local_tar_path)
51
50
 
52
51
  # Extract tarball
53
- with tarfile.open(local_tar_path, "r:gz") as tar:
54
- tar.extractall(path=tmpdir, filter="data")
52
+ safe_extract_tarfile(local_tar_path, tmpdir)
55
53
 
56
54
  # Define model file patterns to search for (in order of preference)
57
55
  patterns = [
58
- # Direct XGBoost model files
59
- os.path.join(tmpdir, "xgboost-model"),
60
- os.path.join(tmpdir, "model"),
61
- os.path.join(tmpdir, "*.bin"),
56
+ # Joblib models (preferred - preserves everything)
57
+ os.path.join(tmpdir, "*model*.joblib"),
58
+ os.path.join(tmpdir, "xgb*.joblib"),
59
+ os.path.join(tmpdir, "**", "*model*.joblib"),
60
+ os.path.join(tmpdir, "**", "xgb*.joblib"),
61
+ # Pickle models (also preserves everything)
62
+ os.path.join(tmpdir, "*model*.pkl"),
63
+ os.path.join(tmpdir, "xgb*.pkl"),
64
+ os.path.join(tmpdir, "**", "*model*.pkl"),
65
+ os.path.join(tmpdir, "**", "xgb*.pkl"),
66
+ # JSON models (fallback - requires reconstruction)
67
+ os.path.join(tmpdir, "*model*.json"),
68
+ os.path.join(tmpdir, "xgb*.json"),
62
69
  os.path.join(tmpdir, "**", "*model*.json"),
63
- os.path.join(tmpdir, "**", "rmse.json"),
64
- # Pickled models
65
- os.path.join(tmpdir, "*.pkl"),
66
- os.path.join(tmpdir, "**", "*.pkl"),
67
- os.path.join(tmpdir, "*.pickle"),
68
- os.path.join(tmpdir, "**", "*.pickle"),
70
+ os.path.join(tmpdir, "**", "xgb*.json"),
69
71
  ]
70
72
 
71
73
  # Try each pattern
72
74
  for pattern in patterns:
73
- # Use glob to find all matching files
74
75
  for model_path in glob.glob(pattern, recursive=True):
75
- # Determine file type by extension
76
+ # Skip files that are clearly not XGBoost models
77
+ filename = os.path.basename(model_path).lower()
78
+ if any(skip in filename for skip in ["label_encoder", "scaler", "preprocessor", "transformer"]):
79
+ log.debug(f"Skipping non-model file: {model_path}")
80
+ continue
81
+
76
82
  _, ext = os.path.splitext(model_path)
77
83
 
78
84
  try:
79
- if ext.lower() in [".pkl", ".pickle"]:
80
- # Handle pickled models
85
+ if ext == ".joblib":
86
+ model = joblib.load(model_path)
87
+ # Verify it's actually an XGBoost model
88
+ if isinstance(model, (xgb.XGBClassifier, xgb.XGBRegressor, xgb.Booster)):
89
+ log.important(f"Loaded XGBoost model from joblib: {model_path}")
90
+ return model
91
+ else:
92
+ log.debug(f"Skipping non-XGBoost object from {model_path}: {type(model)}")
93
+
94
+ elif ext in [".pkl", ".pickle"]:
81
95
  with open(model_path, "rb") as f:
82
96
  model = pickle.load(f)
83
-
84
- # Handle different model types
85
- if isinstance(model, xgb.Booster):
86
- log.important(f"Loaded XGBoost Booster from pickle: {model_path}")
97
+ # Verify it's actually an XGBoost model
98
+ if isinstance(model, (xgb.XGBClassifier, xgb.XGBRegressor, xgb.Booster)):
99
+ log.important(f"Loaded XGBoost model from pickle: {model_path}")
87
100
  return model
88
- elif hasattr(model, "get_booster"):
89
- log.important(f"Loaded XGBoost model from pipeline: {model_path}")
90
- booster = model.get_booster()
91
- return booster
92
- else:
93
- # Handle direct XGBoost model files
101
+ else:
102
+ log.debug(f"Skipping non-XGBoost object from {model_path}: {type(model)}")
103
+
104
+ elif ext == ".json":
105
+ # JSON files should be XGBoost models by definition
94
106
  booster = xgb.Booster()
95
107
  booster.load_model(model_path)
96
- log.important(f"Loaded XGBoost model directly: {model_path}")
108
+ log.important(f"Loaded XGBoost booster from JSON: {model_path}")
97
109
  return booster
110
+
98
111
  except Exception as e:
99
- log.info(f"Failed to load model from {model_path}: {e}")
100
- continue # Try the next file
112
+ log.debug(f"Failed to load {model_path}: {e}")
113
+ continue
101
114
 
102
- # If no model found
103
115
  log.error("No XGBoost model found in the artifact.")
104
116
  return None
105
117
 
106
118
 
107
- def feature_importance(workbench_model, importance_type: str = "weight") -> Optional[List[Tuple[str, float]]]:
119
+ def feature_importance(workbench_model, importance_type: str = "gain") -> Optional[List[Tuple[str, float]]]:
108
120
  """
109
121
  Get sorted feature importances from a Workbench Model object.
110
122
 
111
123
  Args:
112
124
  workbench_model: Workbench model object
113
- importance_type: Type of feature importance.
114
- Options: 'weight', 'gain', 'cover', 'total_gain', 'total_cover'
125
+ importance_type: Type of feature importance. Options:
126
+ - 'gain' (default): Average improvement in loss/objective when feature is used.
127
+ Best for understanding predictive power of features.
128
+ - 'weight': Number of times a feature appears in trees (split count).
129
+ Useful for understanding model complexity and feature usage frequency.
130
+ - 'cover': Average number of samples affected when feature is used.
131
+ Shows the relative quantity of observations related to this feature.
132
+ - 'total_gain': Total improvement in loss/objective across all splits.
133
+ Similar to 'gain' but not averaged (can be biased toward frequent features).
134
+ - 'total_cover': Total number of samples affected across all splits.
135
+ Similar to 'cover' but not averaged.
115
136
 
116
137
  Returns:
117
138
  List of tuples (feature, importance) sorted by importance value (descending).
@@ -120,7 +141,8 @@ def feature_importance(workbench_model, importance_type: str = "weight") -> Opti
120
141
 
121
142
  Note:
122
143
  XGBoost's get_score() only returns features with non-zero importance.
123
- This function ensures all model features are included in the output.
144
+ This function ensures all model features are included in the output,
145
+ adding zero values for features that weren't used in any tree splits.
124
146
  """
125
147
  model_artifact_uri = workbench_model.model_data_url()
126
148
  xgb_model = xgboost_model_from_s3(model_artifact_uri)
@@ -128,11 +150,18 @@ def feature_importance(workbench_model, importance_type: str = "weight") -> Opti
128
150
  log.error("No XGBoost model found in the artifact.")
129
151
  return None
130
152
 
131
- # Get feature importances (only non-zero features)
132
- importances = xgb_model.get_score(importance_type=importance_type)
153
+ # Check if we got a full sklearn model or just a booster (for backwards compatibility)
154
+ if hasattr(xgb_model, "get_booster"):
155
+ # Full sklearn model - get the booster for feature importance
156
+ booster = xgb_model.get_booster()
157
+ all_features = booster.feature_names
158
+ else:
159
+ # Already a booster (legacy JSON load)
160
+ booster = xgb_model
161
+ all_features = xgb_model.feature_names
133
162
 
134
- # Get all feature names from the model
135
- all_features = xgb_model.feature_names
163
+ # Get feature importances (only non-zero features)
164
+ importances = booster.get_score(importance_type=importance_type)
136
165
 
137
166
  # Create complete importance dict with zeros for missing features
138
167
  complete_importances = {feat: importances.get(feat, 0.0) for feat in all_features}
@@ -229,148 +258,260 @@ def leaf_stats(df: pd.DataFrame, target_col: str) -> pd.DataFrame:
229
258
  return result_df
230
259
 
231
260
 
232
- def cross_fold_inference(workbench_model: Any, nfolds: int = 5) -> Dict[str, Any]:
261
+ def cross_fold_inference(workbench_model: Any, nfolds: int = 5) -> Tuple[Dict[str, Any], pd.DataFrame]:
233
262
  """
234
263
  Performs K-fold cross-validation with detailed metrics.
235
264
  Args:
236
265
  workbench_model: Workbench model object
237
266
  nfolds: Number of folds for cross-validation (default is 5)
238
267
  Returns:
239
- Dictionary containing:
240
- - folds: Dictionary of formatted strings for each fold
241
- - summary_metrics: Summary metrics across folds
242
- - overall_metrics: Overall metrics for all folds
268
+ Tuple of:
269
+ - Dictionary containing:
270
+ - folds: Dictionary of formatted strings for each fold
271
+ - summary_metrics: Summary metrics across folds
272
+ - DataFrame with columns: id, target, prediction (out-of-fold predictions for all samples)
243
273
  """
244
274
  from workbench.api import FeatureSet
245
275
 
246
276
  # Load model
247
- model_type = workbench_model.model_type.value
248
277
  model_artifact_uri = workbench_model.model_data_url()
249
- loaded_booster = xgboost_model_from_s3(model_artifact_uri)
250
- if loaded_booster is None:
278
+ loaded_model = xgboost_model_from_s3(model_artifact_uri)
279
+ if loaded_model is None:
251
280
  log.error("No XGBoost model found in the artifact.")
252
- return {}
253
- # Create the model wrapper
254
- is_classifier = model_type == "classifier"
255
- xgb_model = (
256
- xgb.XGBClassifier(enable_categorical=True) if is_classifier else xgb.XGBRegressor(enable_categorical=True)
257
- )
258
- xgb_model._Booster = loaded_booster
281
+ return {}, pd.DataFrame()
282
+
283
+ # Check if we got a full sklearn model or need to create one
284
+ if isinstance(loaded_model, (xgb.XGBClassifier, xgb.XGBRegressor)):
285
+ is_classifier = isinstance(loaded_model, xgb.XGBClassifier)
286
+
287
+ # Get the model's hyperparameters and ensure enable_categorical=True
288
+ params = loaded_model.get_params()
289
+ params["enable_categorical"] = True
290
+
291
+ # Create new model with same params but enable_categorical=True
292
+ if is_classifier:
293
+ xgb_model = xgb.XGBClassifier(**params)
294
+ else:
295
+ xgb_model = xgb.XGBRegressor(**params)
296
+
297
+ elif isinstance(loaded_model, xgb.Booster):
298
+ # Legacy: got a booster, need to wrap it
299
+ log.warning("Deprecated: Loaded model is a Booster, wrapping in sklearn model.")
300
+ is_classifier = workbench_model.model_type.value == "classifier"
301
+ xgb_model = (
302
+ xgb.XGBClassifier(enable_categorical=True) if is_classifier else xgb.XGBRegressor(enable_categorical=True)
303
+ )
304
+ xgb_model._Booster = loaded_model
305
+ else:
306
+ log.error(f"Unexpected model type: {type(loaded_model)}")
307
+ return {}, pd.DataFrame()
308
+
259
309
  # Prepare data
260
310
  fs = FeatureSet(workbench_model.get_input())
261
- df = fs.pull_dataframe()
311
+ df = workbench_model.training_view().pull_dataframe()
312
+
313
+ # Get id column - assuming FeatureSet has an id_column attribute or similar
314
+ id_col = fs.id_column
315
+ target_col = workbench_model.target()
262
316
  feature_cols = workbench_model.features()
263
- # Convert string features to categorical
317
+
318
+ # Convert string[python] to object, then to category for XGBoost compatibility
319
+ # This avoids XGBoost's issue with pandas 2.x string[python] dtype in categorical categories
264
320
  for col in feature_cols:
265
- if df[col].dtype in ["object", "string"]:
266
- df[col] = df[col].astype("category")
267
- # Split X and y
268
- X = df[workbench_model.features()]
269
- y = df[workbench_model.target()]
321
+ if pd.api.types.is_string_dtype(df[col]):
322
+ # Double conversion: string[python] -> object -> category
323
+ df[col] = df[col].astype("object").astype("category")
270
324
 
271
- # Encode target if it's a classification problem
325
+ X = df[feature_cols]
326
+ y = df[target_col]
327
+ ids = df[id_col]
328
+
329
+ # Encode target if classifier
272
330
  label_encoder = LabelEncoder() if is_classifier else None
273
331
  if label_encoder:
274
- y = pd.Series(label_encoder.fit_transform(y), name=workbench_model.target())
332
+ y_encoded = label_encoder.fit_transform(y)
333
+ y_for_cv = pd.Series(y_encoded, index=y.index, name=target_col)
334
+ else:
335
+ y_for_cv = y
336
+
275
337
  # Prepare KFold
276
- kfold = (
277
- StratifiedKFold(n_splits=nfolds, shuffle=True, random_state=42)
278
- if is_classifier
279
- else KFold(n_splits=nfolds, shuffle=True, random_state=42)
280
- )
338
+ kfold = (StratifiedKFold if is_classifier else KFold)(n_splits=nfolds, shuffle=True, random_state=42)
339
+
340
+ # Initialize results collection
341
+ fold_metrics = []
342
+ predictions_df = pd.DataFrame({id_col: ids, target_col: y}) # Keep original values
343
+ # Note: 'prediction' column will be created automatically with correct dtype
281
344
 
282
- fold_results = []
283
- all_predictions = []
284
- all_actuals = []
285
- for fold_idx, (train_idx, val_idx) in enumerate(kfold.split(X, y)):
345
+ # Perform cross-validation
346
+ for fold_idx, (train_idx, val_idx) in enumerate(kfold.split(X, y_for_cv), 1):
286
347
  X_train, X_val = X.iloc[train_idx], X.iloc[val_idx]
287
- y_train, y_val = y.iloc[train_idx], y.iloc[val_idx]
348
+ y_train, y_val = y_for_cv.iloc[train_idx], y_for_cv.iloc[val_idx]
288
349
 
289
- # Train the model
350
+ # Train and predict
290
351
  xgb_model.fit(X_train, y_train)
291
352
  preds = xgb_model.predict(X_val)
292
- all_predictions.extend(preds)
293
- all_actuals.extend(y_val)
294
353
 
295
- # Calculate metrics for this fold
296
- fold_metrics = {"fold": fold_idx + 1}
354
+ # Store predictions (decode if classifier)
355
+ val_indices = X_val.index
356
+ if is_classifier:
357
+ predictions_df.loc[val_indices, "prediction"] = label_encoder.inverse_transform(preds.astype(int))
358
+ else:
359
+ predictions_df.loc[val_indices, "prediction"] = preds
297
360
 
361
+ # Calculate fold metrics
298
362
  if is_classifier:
299
- y_val_original = label_encoder.inverse_transform(y_val)
300
- preds_original = label_encoder.inverse_transform(preds.astype(int))
301
- scores = precision_recall_fscore_support(
302
- y_val_original, preds_original, average="weighted", zero_division=0
363
+ y_val_orig = label_encoder.inverse_transform(y_val)
364
+ preds_orig = label_encoder.inverse_transform(preds.astype(int))
365
+ prec, rec, f1, _ = precision_recall_fscore_support(
366
+ y_val_orig, preds_orig, average="weighted", zero_division=0
303
367
  )
304
- fold_metrics.update({"precision": float(scores[0]), "recall": float(scores[1]), "fscore": float(scores[2])})
368
+ fold_metrics.append({"fold": fold_idx, "precision": prec, "recall": rec, "fscore": f1})
305
369
  else:
306
- fold_metrics.update(
370
+ spearman_corr, _ = spearmanr(y_val, preds)
371
+ fold_metrics.append(
307
372
  {
308
- "rmse": float(np.sqrt(mean_squared_error(y_val, preds))),
309
- "mae": float(mean_absolute_error(y_val, preds)),
310
- "r2": float(r2_score(y_val, preds)),
373
+ "fold": fold_idx,
374
+ "rmse": np.sqrt(mean_squared_error(y_val, preds)),
375
+ "mae": mean_absolute_error(y_val, preds),
376
+ "medae": median_absolute_error(y_val, preds),
377
+ "r2": r2_score(y_val, preds),
378
+ "spearmanr": spearman_corr,
311
379
  }
312
380
  )
313
381
 
314
- fold_results.append(fold_metrics)
315
- # Calculate overall metrics
316
- overall_metrics = {}
317
- if is_classifier:
318
- all_actuals_original = label_encoder.inverse_transform(all_actuals)
319
- all_predictions_original = label_encoder.inverse_transform(all_predictions)
320
- scores = precision_recall_fscore_support(
321
- all_actuals_original, all_predictions_original, average="weighted", zero_division=0
322
- )
323
- overall_metrics.update(
324
- {
325
- "precision": float(scores[0]),
326
- "recall": float(scores[1]),
327
- "fscore": float(scores[2]),
328
- "confusion_matrix": confusion_matrix(
329
- all_actuals_original, all_predictions_original, labels=label_encoder.classes_
330
- ).tolist(),
331
- "label_names": list(label_encoder.classes_),
332
- }
333
- )
334
- else:
335
- overall_metrics.update(
336
- {
337
- "rmse": float(np.sqrt(mean_squared_error(all_actuals, all_predictions))),
338
- "mae": float(mean_absolute_error(all_actuals, all_predictions)),
339
- "r2": float(r2_score(all_actuals, all_predictions)),
340
- }
341
- )
342
- # Calculate summary metrics across folds
343
- summary_metrics = {}
344
- metrics_to_aggregate = ["precision", "recall", "fscore"] if is_classifier else ["rmse", "mae", "r2"]
345
-
346
- for metric in metrics_to_aggregate:
347
- values = [fold[metric] for fold in fold_results]
348
- summary_metrics[metric] = f"{float(np.mean(values)):.3f} ±{float(np.std(values)):.3f}"
349
- # Format fold results as strings (TBD section)
382
+ # Calculate summary metrics (mean ± std)
383
+ fold_df = pd.DataFrame(fold_metrics)
384
+ metric_names = ["precision", "recall", "fscore"] if is_classifier else ["rmse", "mae", "medae", "r2", "spearmanr"]
385
+ summary_metrics = {metric: f"{fold_df[metric].mean():.3f} ±{fold_df[metric].std():.3f}" for metric in metric_names}
386
+
387
+ # Format fold results for display
350
388
  formatted_folds = {}
351
- for fold_data in fold_results:
352
- fold_key = f"Fold {fold_data['fold']}"
389
+ for _, row in fold_df.iterrows():
390
+ fold_key = f"Fold {int(row['fold'])}"
353
391
  if is_classifier:
354
392
  formatted_folds[fold_key] = (
355
- f"precision: {fold_data['precision']:.3f} "
356
- f"recall: {fold_data['recall']:.3f} "
357
- f"fscore: {fold_data['fscore']:.3f}"
393
+ f"precision: {row['precision']:.3f} " f"recall: {row['recall']:.3f} " f"fscore: {row['fscore']:.3f}"
358
394
  )
359
395
  else:
360
396
  formatted_folds[fold_key] = (
361
- f"rmse: {fold_data['rmse']:.3f} mae: {fold_data['mae']:.3f} r2: {fold_data['r2']:.3f}"
397
+ f"rmse: {row['rmse']:.3f} "
398
+ f"mae: {row['mae']:.3f} "
399
+ f"medae: {row['medae']:.3f} "
400
+ f"r2: {row['r2']:.3f} "
401
+ f"spearmanr: {row['spearmanr']:.3f}"
362
402
  )
363
- # Return the results
364
- return {
365
- "summary_metrics": summary_metrics,
366
- # "overall_metrics": overall_metrics,
367
- "folds": formatted_folds,
368
- }
403
+
404
+ # Build return dictionary
405
+ metrics_dict = {"summary_metrics": summary_metrics, "folds": formatted_folds}
406
+
407
+ return metrics_dict, predictions_df
408
+
409
+
410
+ def leave_one_out_inference(workbench_model: Any) -> pd.DataFrame:
411
+ """
412
+ Performs leave-one-out cross-validation (parallelized).
413
+ For datasets > 1000 rows, first identifies top 100 worst predictions via 10-fold CV,
414
+ then performs true leave-one-out on those 100 samples.
415
+ Each model trains on ALL data except one sample.
416
+ """
417
+ from workbench.api import FeatureSet
418
+ from joblib import Parallel, delayed
419
+ from tqdm import tqdm
420
+
421
+ def train_and_predict_one(model_params, is_classifier, X, y, train_idx, val_idx):
422
+ """Train on train_idx, predict on val_idx."""
423
+ model = xgb.XGBClassifier(**model_params) if is_classifier else xgb.XGBRegressor(**model_params)
424
+ model.fit(X[train_idx], y[train_idx])
425
+ return model.predict(X[val_idx])[0]
426
+
427
+ # Load model and get params
428
+ model_artifact_uri = workbench_model.model_data_url()
429
+ loaded_model = xgboost_model_from_s3(model_artifact_uri)
430
+ if loaded_model is None:
431
+ log.error("No XGBoost model found in the artifact.")
432
+ return pd.DataFrame()
433
+
434
+ if isinstance(loaded_model, (xgb.XGBClassifier, xgb.XGBRegressor)):
435
+ is_classifier = isinstance(loaded_model, xgb.XGBClassifier)
436
+ model_params = loaded_model.get_params()
437
+ elif isinstance(loaded_model, xgb.Booster):
438
+ log.warning("Deprecated: Loaded model is a Booster, wrapping in sklearn model.")
439
+ is_classifier = workbench_model.model_type.value == "classifier"
440
+ model_params = {"enable_categorical": True}
441
+ else:
442
+ log.error(f"Unexpected model type: {type(loaded_model)}")
443
+ return pd.DataFrame()
444
+
445
+ # Load and prepare data
446
+ fs = FeatureSet(workbench_model.get_input())
447
+ df = workbench_model.training_view().pull_dataframe()
448
+ id_col = fs.id_column
449
+ target_col = workbench_model.target()
450
+ feature_cols = workbench_model.features()
451
+
452
+ # Convert string[python] to object, then to category for XGBoost compatibility
453
+ # This avoids XGBoost's issue with pandas 2.x string[python] dtype in categorical categories
454
+ for col in feature_cols:
455
+ if pd.api.types.is_string_dtype(df[col]):
456
+ # Double conversion: string[python] -> object -> category
457
+ df[col] = df[col].astype("object").astype("category")
458
+
459
+ # Determine which samples to run LOO on
460
+ if len(df) > 1000:
461
+ log.important(f"Dataset has {len(df)} rows. Running 10-fold CV to identify top 1000 worst predictions...")
462
+ _, predictions_df = cross_fold_inference(workbench_model, nfolds=10)
463
+ predictions_df["residual_abs"] = np.abs(predictions_df[target_col] - predictions_df["prediction"])
464
+ worst_samples = predictions_df.nlargest(1000, "residual_abs")
465
+ worst_ids = worst_samples[id_col].values
466
+ loo_indices = df[df[id_col].isin(worst_ids)].index.values
467
+ log.important(f"Running leave-one-out CV on 1000 worst samples. Each model trains on {len(df)-1} rows...")
468
+ else:
469
+ log.important(f"Running leave-one-out CV on all {len(df)} samples...")
470
+ loo_indices = df.index.values
471
+
472
+ # Prepare full dataset for training
473
+ X_full = df[feature_cols].values
474
+ y_full = df[target_col].values
475
+
476
+ # Encode target if classifier
477
+ label_encoder = LabelEncoder() if is_classifier else None
478
+ if label_encoder:
479
+ y_full = label_encoder.fit_transform(y_full)
480
+
481
+ # Generate LOO splits
482
+ splits = []
483
+ for loo_idx in loo_indices:
484
+ train_idx = np.delete(np.arange(len(X_full)), loo_idx)
485
+ val_idx = np.array([loo_idx])
486
+ splits.append((train_idx, val_idx))
487
+
488
+ # Parallel execution
489
+ predictions = Parallel(n_jobs=4)(
490
+ delayed(train_and_predict_one)(model_params, is_classifier, X_full, y_full, train_idx, val_idx)
491
+ for train_idx, val_idx in tqdm(splits, desc="LOO CV")
492
+ )
493
+
494
+ # Build results dataframe
495
+ predictions_array = np.array(predictions)
496
+ if label_encoder:
497
+ predictions_array = label_encoder.inverse_transform(predictions_array.astype(int))
498
+
499
+ predictions_df = pd.DataFrame(
500
+ {
501
+ id_col: df.loc[loo_indices, id_col].values,
502
+ target_col: df.loc[loo_indices, target_col].values,
503
+ "prediction": predictions_array,
504
+ }
505
+ )
506
+
507
+ predictions_df["residual_abs"] = np.abs(predictions_df[target_col] - predictions_df["prediction"])
508
+
509
+ return predictions_df
369
510
 
370
511
 
371
512
  if __name__ == "__main__":
372
513
  """Exercise the Model Utilities"""
373
- from workbench.api import Model, FeatureSet
514
+ from workbench.api import Model
374
515
  from pprint import pprint
375
516
 
376
517
  # Test the XGBoost model loading and feature importance
@@ -383,11 +524,28 @@ if __name__ == "__main__":
383
524
  model_artifact_uri = model.model_data_url()
384
525
  xgb_model = xgboost_model_from_s3(model_artifact_uri)
385
526
 
527
+ # Verify enable_categorical is preserved (for debugging/confidence)
528
+ print(f"Model parameters: {xgb_model.get_params()}")
529
+ print(f"enable_categorical: {xgb_model.enable_categorical}")
530
+
386
531
  # Test with UQ Model
387
532
  uq_model = Model("aqsol-uq")
388
533
  _xgb_model = xgboost_model_from_s3(uq_model.model_data_url())
389
534
 
535
+ print("\n=== CROSS FOLD REGRESSION EXAMPLE ===")
536
+ model = Model("abalone-regression")
537
+ results, df = cross_fold_inference(model)
538
+ pprint(results)
539
+ print(df.head())
540
+
541
+ print("\n=== CROSS FOLD CLASSIFICATION EXAMPLE ===")
542
+ model = Model("wine-classification")
543
+ results, df = cross_fold_inference(model)
544
+ pprint(results)
545
+ print(df.head())
546
+
390
547
  # Test XGBoost add_leaf_hash
548
+ """
391
549
  input_df = FeatureSet(model.get_input()).pull_dataframe()
392
550
  leaf_df = add_leaf_hash(model, input_df)
393
551
  print("DataFrame with Leaf Hash:")
@@ -404,13 +562,4 @@ if __name__ == "__main__":
404
562
  stats_df = leaf_stats(leaf_df, target_col)
405
563
  print("DataFrame with Leaf Statistics:")
406
564
  print(stats_df)
407
-
408
- print("\n=== CROSS FOLD REGRESSION EXAMPLE ===")
409
- model = Model("abalone-regression")
410
- results = cross_fold_inference(model)
411
- pprint(results)
412
-
413
- print("\n=== CROSS FOLD CLASSIFICATION EXAMPLE ===")
414
- model = Model("wine-classification")
415
- results = cross_fold_inference(model)
416
- pprint(results)
565
+ """
@@ -39,7 +39,13 @@ class ModelPlot(ComponentInterface):
39
39
  # Calculate the distance from the diagonal for each point
40
40
  target = model.target()
41
41
  df["error"] = abs(df["prediction"] - df[target])
42
- return ScatterPlot().update_properties(df, color="error", regression_line=True)[0]
42
+ return ScatterPlot().update_properties(
43
+ df,
44
+ color="error",
45
+ regression_line=True,
46
+ x=target,
47
+ y="prediction",
48
+ )[0]
43
49
  else:
44
50
  return self.display_text(f"Model Type: {model.model_type}\n\n Awesome Plot Coming Soon!")
45
51
 
@@ -72,7 +72,9 @@ class DashboardStatus(PluginInterface):
72
72
  details = "**Redis:** 🔴 Failed to Connect<br>"
73
73
 
74
74
  # Fill in the license details
75
- details += f"**Redis Server:** {config_info['REDIS_HOST']}:{config_info.get('REDIS_PORT', 6379)}<br>"
75
+ redis_host = config_info.get("REDIS_HOST", "NOT SET")
76
+ redis_port = config_info.get("REDIS_PORT", "NOT SET")
77
+ details += f"**Redis Server:** {redis_host}:{redis_port}<br>"
76
78
  details += f"**Workbench S3 Bucket:** {config_info['WORKBENCH_BUCKET']}<br>"
77
79
  details += f"**Plugin Path:** {config_info.get('WORKBENCH_PLUGINS', 'unknown')}<br>"
78
80
  details += f"**Themes Path:** {config_info.get('WORKBENCH_THEMES', 'unknown')}<br>"
@@ -5,7 +5,7 @@ import dash_bootstrap_components as dbc
5
5
 
6
6
  # Workbench Imports
7
7
  from workbench.api.compound import Compound
8
- from workbench.utils.chem_utils import svg_from_smiles
8
+ from workbench.utils.chem_utils.vis import svg_from_smiles
9
9
  from workbench.web_interface.components.plugin_interface import PluginInterface, PluginPage, PluginInputType
10
10
  from workbench.utils.theme_manager import ThemeManager
11
11
  from workbench.utils.ai_summary import AISummary
@@ -249,8 +249,13 @@ class ModelDetails(PluginInterface):
249
249
  if not inference_runs:
250
250
  return [], None
251
251
 
252
- # Set "auto_inference" as the default, if that doesn't exist, set the first
253
- default_inference_run = "auto_inference" if "auto_inference" in inference_runs else inference_runs[0]
252
+ # Default inference run (full_cross_fold if it exists, then auto_inference, then first)
253
+ if "full_cross_fold" in inference_runs:
254
+ default_inference_run = "full_cross_fold"
255
+ elif "auto_inference" in inference_runs:
256
+ default_inference_run = "auto_inference"
257
+ else:
258
+ default_inference_run = inference_runs[0]
254
259
 
255
260
  # Return the options for the dropdown and the selected value
256
261
  return inference_runs, default_inference_run