workbench 0.8.161__py3-none-any.whl → 0.8.192__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (96) hide show
  1. workbench/algorithms/dataframe/proximity.py +143 -102
  2. workbench/algorithms/graph/light/proximity_graph.py +2 -1
  3. workbench/api/compound.py +1 -1
  4. workbench/api/endpoint.py +12 -0
  5. workbench/api/feature_set.py +4 -4
  6. workbench/api/meta.py +5 -2
  7. workbench/api/model.py +16 -12
  8. workbench/api/monitor.py +1 -16
  9. workbench/core/artifacts/artifact.py +11 -3
  10. workbench/core/artifacts/data_capture_core.py +355 -0
  11. workbench/core/artifacts/endpoint_core.py +168 -78
  12. workbench/core/artifacts/feature_set_core.py +72 -13
  13. workbench/core/artifacts/model_core.py +50 -15
  14. workbench/core/artifacts/monitor_core.py +33 -248
  15. workbench/core/cloud_platform/aws/aws_account_clamp.py +50 -1
  16. workbench/core/cloud_platform/aws/aws_meta.py +12 -5
  17. workbench/core/cloud_platform/aws/aws_session.py +4 -4
  18. workbench/core/transforms/data_to_features/light/molecular_descriptors.py +4 -4
  19. workbench/core/transforms/features_to_model/features_to_model.py +9 -4
  20. workbench/core/transforms/model_to_endpoint/model_to_endpoint.py +36 -6
  21. workbench/core/transforms/pandas_transforms/pandas_to_features.py +27 -0
  22. workbench/core/views/training_view.py +49 -53
  23. workbench/core/views/view.py +51 -1
  24. workbench/core/views/view_utils.py +4 -4
  25. workbench/model_scripts/custom_models/chem_info/mol_descriptors.py +483 -0
  26. workbench/model_scripts/custom_models/chem_info/mol_standardize.py +450 -0
  27. workbench/model_scripts/custom_models/chem_info/molecular_descriptors.py +7 -9
  28. workbench/model_scripts/custom_models/proximity/feature_space_proximity.template +3 -5
  29. workbench/model_scripts/custom_models/proximity/proximity.py +143 -102
  30. workbench/model_scripts/custom_models/uq_models/bayesian_ridge.template +7 -8
  31. workbench/model_scripts/custom_models/uq_models/ensemble_xgb.template +10 -17
  32. workbench/model_scripts/custom_models/uq_models/gaussian_process.template +5 -11
  33. workbench/model_scripts/custom_models/uq_models/meta_uq.template +156 -58
  34. workbench/model_scripts/custom_models/uq_models/ngboost.template +20 -14
  35. workbench/model_scripts/custom_models/uq_models/proximity.py +143 -102
  36. workbench/model_scripts/custom_models/uq_models/requirements.txt +1 -3
  37. workbench/model_scripts/ensemble_xgb/ensemble_xgb.template +5 -13
  38. workbench/model_scripts/pytorch_model/pytorch.template +19 -20
  39. workbench/model_scripts/scikit_learn/scikit_learn.template +4 -9
  40. workbench/model_scripts/script_generation.py +7 -2
  41. workbench/model_scripts/uq_models/mapie.template +492 -0
  42. workbench/model_scripts/uq_models/requirements.txt +1 -0
  43. workbench/model_scripts/xgb_model/xgb_model.template +31 -40
  44. workbench/repl/workbench_shell.py +11 -6
  45. workbench/scripts/lambda_launcher.py +63 -0
  46. workbench/scripts/ml_pipeline_batch.py +137 -0
  47. workbench/scripts/ml_pipeline_sqs.py +186 -0
  48. workbench/scripts/monitor_cloud_watch.py +20 -100
  49. workbench/utils/aws_utils.py +4 -3
  50. workbench/utils/chem_utils/__init__.py +0 -0
  51. workbench/utils/chem_utils/fingerprints.py +134 -0
  52. workbench/utils/chem_utils/misc.py +194 -0
  53. workbench/utils/chem_utils/mol_descriptors.py +483 -0
  54. workbench/utils/chem_utils/mol_standardize.py +450 -0
  55. workbench/utils/chem_utils/mol_tagging.py +348 -0
  56. workbench/utils/chem_utils/projections.py +209 -0
  57. workbench/utils/chem_utils/salts.py +256 -0
  58. workbench/utils/chem_utils/sdf.py +292 -0
  59. workbench/utils/chem_utils/toxicity.py +250 -0
  60. workbench/utils/chem_utils/vis.py +253 -0
  61. workbench/utils/cloudwatch_handler.py +1 -1
  62. workbench/utils/cloudwatch_utils.py +137 -0
  63. workbench/utils/config_manager.py +3 -7
  64. workbench/utils/endpoint_utils.py +5 -7
  65. workbench/utils/license_manager.py +2 -6
  66. workbench/utils/model_utils.py +76 -30
  67. workbench/utils/monitor_utils.py +44 -62
  68. workbench/utils/pandas_utils.py +3 -3
  69. workbench/utils/shap_utils.py +10 -2
  70. workbench/utils/workbench_logging.py +0 -3
  71. workbench/utils/workbench_sqs.py +1 -1
  72. workbench/utils/xgboost_model_utils.py +283 -145
  73. workbench/web_interface/components/plugins/dashboard_status.py +3 -1
  74. workbench/web_interface/components/plugins/generated_compounds.py +1 -1
  75. workbench/web_interface/components/plugins/scatter_plot.py +3 -3
  76. {workbench-0.8.161.dist-info → workbench-0.8.192.dist-info}/METADATA +4 -4
  77. {workbench-0.8.161.dist-info → workbench-0.8.192.dist-info}/RECORD +81 -76
  78. {workbench-0.8.161.dist-info → workbench-0.8.192.dist-info}/entry_points.txt +3 -0
  79. workbench/model_scripts/custom_models/chem_info/local_utils.py +0 -769
  80. workbench/model_scripts/custom_models/chem_info/tautomerize.py +0 -83
  81. workbench/model_scripts/custom_models/proximity/generated_model_script.py +0 -138
  82. workbench/model_scripts/custom_models/uq_models/generated_model_script.py +0 -393
  83. workbench/model_scripts/custom_models/uq_models/mapie_xgb.template +0 -203
  84. workbench/model_scripts/ensemble_xgb/generated_model_script.py +0 -279
  85. workbench/model_scripts/pytorch_model/generated_model_script.py +0 -565
  86. workbench/model_scripts/quant_regression/quant_regression.template +0 -279
  87. workbench/model_scripts/quant_regression/requirements.txt +0 -1
  88. workbench/model_scripts/scikit_learn/generated_model_script.py +0 -307
  89. workbench/model_scripts/xgb_model/generated_model_script.py +0 -477
  90. workbench/utils/chem_utils.py +0 -1556
  91. workbench/utils/execution_environment.py +0 -211
  92. workbench/utils/fast_inference.py +0 -167
  93. workbench/utils/resource_utils.py +0 -39
  94. {workbench-0.8.161.dist-info → workbench-0.8.192.dist-info}/WHEEL +0 -0
  95. {workbench-0.8.161.dist-info → workbench-0.8.192.dist-info}/licenses/LICENSE +0 -0
  96. {workbench-0.8.161.dist-info → workbench-0.8.192.dist-info}/top_level.txt +0 -0
@@ -4,6 +4,7 @@ import logging
4
4
  import os
5
5
  import tempfile
6
6
  import tarfile
7
+ import joblib
7
8
  import pickle
8
9
  import glob
9
10
  import awswrangler as wr
@@ -16,11 +17,12 @@ from typing import Dict, Any
16
17
  from sklearn.model_selection import KFold, StratifiedKFold
17
18
  from sklearn.metrics import (
18
19
  precision_recall_fscore_support,
19
- confusion_matrix,
20
20
  mean_squared_error,
21
21
  mean_absolute_error,
22
22
  r2_score,
23
+ median_absolute_error,
23
24
  )
25
+ from scipy.stats import spearmanr
24
26
  from sklearn.preprocessing import LabelEncoder
25
27
 
26
28
  # Workbench Imports
@@ -34,14 +36,12 @@ log = logging.getLogger("workbench")
34
36
  def xgboost_model_from_s3(model_artifact_uri: str):
35
37
  """
36
38
  Download and extract XGBoost model artifact from S3, then load the model into memory.
37
- Handles both direct XGBoost model files and pickled models.
38
- Ensures categorical feature support is enabled.
39
39
 
40
40
  Args:
41
41
  model_artifact_uri (str): S3 URI of the model artifact.
42
42
 
43
43
  Returns:
44
- Loaded XGBoost model or None if unavailable.
44
+ Loaded XGBoost model (XGBClassifier, XGBRegressor, or Booster) or None if unavailable.
45
45
  """
46
46
 
47
47
  with tempfile.TemporaryDirectory() as tmpdir:
@@ -51,67 +51,91 @@ def xgboost_model_from_s3(model_artifact_uri: str):
51
51
 
52
52
  # Extract tarball
53
53
  with tarfile.open(local_tar_path, "r:gz") as tar:
54
- tar.extractall(path=tmpdir, filter="data")
54
+ # Note: For 3.12+, can use filter="data" argument
55
+ tar.extractall(path=tmpdir)
55
56
 
56
57
  # Define model file patterns to search for (in order of preference)
57
58
  patterns = [
58
- # Direct XGBoost model files
59
- os.path.join(tmpdir, "xgboost-model"),
60
- os.path.join(tmpdir, "model"),
61
- os.path.join(tmpdir, "*.bin"),
59
+ # Joblib models (preferred - preserves everything)
60
+ os.path.join(tmpdir, "*model*.joblib"),
61
+ os.path.join(tmpdir, "xgb*.joblib"),
62
+ os.path.join(tmpdir, "**", "*model*.joblib"),
63
+ os.path.join(tmpdir, "**", "xgb*.joblib"),
64
+ # Pickle models (also preserves everything)
65
+ os.path.join(tmpdir, "*model*.pkl"),
66
+ os.path.join(tmpdir, "xgb*.pkl"),
67
+ os.path.join(tmpdir, "**", "*model*.pkl"),
68
+ os.path.join(tmpdir, "**", "xgb*.pkl"),
69
+ # JSON models (fallback - requires reconstruction)
70
+ os.path.join(tmpdir, "*model*.json"),
71
+ os.path.join(tmpdir, "xgb*.json"),
62
72
  os.path.join(tmpdir, "**", "*model*.json"),
63
- os.path.join(tmpdir, "**", "rmse.json"),
64
- # Pickled models
65
- os.path.join(tmpdir, "*.pkl"),
66
- os.path.join(tmpdir, "**", "*.pkl"),
67
- os.path.join(tmpdir, "*.pickle"),
68
- os.path.join(tmpdir, "**", "*.pickle"),
73
+ os.path.join(tmpdir, "**", "xgb*.json"),
69
74
  ]
70
75
 
71
76
  # Try each pattern
72
77
  for pattern in patterns:
73
- # Use glob to find all matching files
74
78
  for model_path in glob.glob(pattern, recursive=True):
75
- # Determine file type by extension
79
+ # Skip files that are clearly not XGBoost models
80
+ filename = os.path.basename(model_path).lower()
81
+ if any(skip in filename for skip in ["label_encoder", "scaler", "preprocessor", "transformer"]):
82
+ log.debug(f"Skipping non-model file: {model_path}")
83
+ continue
84
+
76
85
  _, ext = os.path.splitext(model_path)
77
86
 
78
87
  try:
79
- if ext.lower() in [".pkl", ".pickle"]:
80
- # Handle pickled models
88
+ if ext == ".joblib":
89
+ model = joblib.load(model_path)
90
+ # Verify it's actually an XGBoost model
91
+ if isinstance(model, (xgb.XGBClassifier, xgb.XGBRegressor, xgb.Booster)):
92
+ log.important(f"Loaded XGBoost model from joblib: {model_path}")
93
+ return model
94
+ else:
95
+ log.debug(f"Skipping non-XGBoost object from {model_path}: {type(model)}")
96
+
97
+ elif ext in [".pkl", ".pickle"]:
81
98
  with open(model_path, "rb") as f:
82
99
  model = pickle.load(f)
83
-
84
- # Handle different model types
85
- if isinstance(model, xgb.Booster):
86
- log.important(f"Loaded XGBoost Booster from pickle: {model_path}")
100
+ # Verify it's actually an XGBoost model
101
+ if isinstance(model, (xgb.XGBClassifier, xgb.XGBRegressor, xgb.Booster)):
102
+ log.important(f"Loaded XGBoost model from pickle: {model_path}")
87
103
  return model
88
- elif hasattr(model, "get_booster"):
89
- log.important(f"Loaded XGBoost model from pipeline: {model_path}")
90
- booster = model.get_booster()
91
- return booster
92
- else:
93
- # Handle direct XGBoost model files
104
+ else:
105
+ log.debug(f"Skipping non-XGBoost object from {model_path}: {type(model)}")
106
+
107
+ elif ext == ".json":
108
+ # JSON files should be XGBoost models by definition
94
109
  booster = xgb.Booster()
95
110
  booster.load_model(model_path)
96
- log.important(f"Loaded XGBoost model directly: {model_path}")
111
+ log.important(f"Loaded XGBoost booster from JSON: {model_path}")
97
112
  return booster
113
+
98
114
  except Exception as e:
99
- log.info(f"Failed to load model from {model_path}: {e}")
100
- continue # Try the next file
115
+ log.debug(f"Failed to load {model_path}: {e}")
116
+ continue
101
117
 
102
- # If no model found
103
118
  log.error("No XGBoost model found in the artifact.")
104
119
  return None
105
120
 
106
121
 
107
- def feature_importance(workbench_model, importance_type: str = "weight") -> Optional[List[Tuple[str, float]]]:
122
+ def feature_importance(workbench_model, importance_type: str = "gain") -> Optional[List[Tuple[str, float]]]:
108
123
  """
109
124
  Get sorted feature importances from a Workbench Model object.
110
125
 
111
126
  Args:
112
127
  workbench_model: Workbench model object
113
- importance_type: Type of feature importance.
114
- Options: 'weight', 'gain', 'cover', 'total_gain', 'total_cover'
128
+ importance_type: Type of feature importance. Options:
129
+ - 'gain' (default): Average improvement in loss/objective when feature is used.
130
+ Best for understanding predictive power of features.
131
+ - 'weight': Number of times a feature appears in trees (split count).
132
+ Useful for understanding model complexity and feature usage frequency.
133
+ - 'cover': Average number of samples affected when feature is used.
134
+ Shows the relative quantity of observations related to this feature.
135
+ - 'total_gain': Total improvement in loss/objective across all splits.
136
+ Similar to 'gain' but not averaged (can be biased toward frequent features).
137
+ - 'total_cover': Total number of samples affected across all splits.
138
+ Similar to 'cover' but not averaged.
115
139
 
116
140
  Returns:
117
141
  List of tuples (feature, importance) sorted by importance value (descending).
@@ -120,7 +144,8 @@ def feature_importance(workbench_model, importance_type: str = "weight") -> Opti
120
144
 
121
145
  Note:
122
146
  XGBoost's get_score() only returns features with non-zero importance.
123
- This function ensures all model features are included in the output.
147
+ This function ensures all model features are included in the output,
148
+ adding zero values for features that weren't used in any tree splits.
124
149
  """
125
150
  model_artifact_uri = workbench_model.model_data_url()
126
151
  xgb_model = xgboost_model_from_s3(model_artifact_uri)
@@ -128,11 +153,18 @@ def feature_importance(workbench_model, importance_type: str = "weight") -> Opti
128
153
  log.error("No XGBoost model found in the artifact.")
129
154
  return None
130
155
 
131
- # Get feature importances (only non-zero features)
132
- importances = xgb_model.get_score(importance_type=importance_type)
156
+ # Check if we got a full sklearn model or just a booster (for backwards compatibility)
157
+ if hasattr(xgb_model, "get_booster"):
158
+ # Full sklearn model - get the booster for feature importance
159
+ booster = xgb_model.get_booster()
160
+ all_features = booster.feature_names
161
+ else:
162
+ # Already a booster (legacy JSON load)
163
+ booster = xgb_model
164
+ all_features = xgb_model.feature_names
133
165
 
134
- # Get all feature names from the model
135
- all_features = xgb_model.feature_names
166
+ # Get feature importances (only non-zero features)
167
+ importances = booster.get_score(importance_type=importance_type)
136
168
 
137
169
  # Create complete importance dict with zeros for missing features
138
170
  complete_importances = {feat: importances.get(feat, 0.0) for feat in all_features}
@@ -229,148 +261,246 @@ def leaf_stats(df: pd.DataFrame, target_col: str) -> pd.DataFrame:
229
261
  return result_df
230
262
 
231
263
 
232
- def cross_fold_inference(workbench_model: Any, nfolds: int = 5) -> Dict[str, Any]:
264
+ def cross_fold_inference(workbench_model: Any, nfolds: int = 5) -> Tuple[Dict[str, Any], pd.DataFrame]:
233
265
  """
234
266
  Performs K-fold cross-validation with detailed metrics.
235
267
  Args:
236
268
  workbench_model: Workbench model object
237
269
  nfolds: Number of folds for cross-validation (default is 5)
238
270
  Returns:
239
- Dictionary containing:
240
- - folds: Dictionary of formatted strings for each fold
241
- - summary_metrics: Summary metrics across folds
242
- - overall_metrics: Overall metrics for all folds
271
+ Tuple of:
272
+ - Dictionary containing:
273
+ - folds: Dictionary of formatted strings for each fold
274
+ - summary_metrics: Summary metrics across folds
275
+ - DataFrame with columns: id, target, prediction (out-of-fold predictions for all samples)
243
276
  """
244
277
  from workbench.api import FeatureSet
245
278
 
246
279
  # Load model
247
- model_type = workbench_model.model_type.value
248
280
  model_artifact_uri = workbench_model.model_data_url()
249
- loaded_booster = xgboost_model_from_s3(model_artifact_uri)
250
- if loaded_booster is None:
281
+ loaded_model = xgboost_model_from_s3(model_artifact_uri)
282
+ if loaded_model is None:
251
283
  log.error("No XGBoost model found in the artifact.")
252
- return {}
253
- # Create the model wrapper
254
- is_classifier = model_type == "classifier"
255
- xgb_model = (
256
- xgb.XGBClassifier(enable_categorical=True) if is_classifier else xgb.XGBRegressor(enable_categorical=True)
257
- )
258
- xgb_model._Booster = loaded_booster
284
+ return {}, pd.DataFrame()
285
+
286
+ # Check if we got a full sklearn model or need to create one
287
+ if isinstance(loaded_model, (xgb.XGBClassifier, xgb.XGBRegressor)):
288
+ xgb_model = loaded_model
289
+ is_classifier = isinstance(xgb_model, xgb.XGBClassifier)
290
+ elif isinstance(loaded_model, xgb.Booster):
291
+ # Legacy: got a booster, need to wrap it
292
+ log.warning("Deprecated: Loaded model is a Booster, wrapping in sklearn model.")
293
+ is_classifier = workbench_model.model_type.value == "classifier"
294
+ xgb_model = (
295
+ xgb.XGBClassifier(enable_categorical=True) if is_classifier else xgb.XGBRegressor(enable_categorical=True)
296
+ )
297
+ xgb_model._Booster = loaded_model
298
+ else:
299
+ log.error(f"Unexpected model type: {type(loaded_model)}")
300
+ return {}, pd.DataFrame()
301
+
259
302
  # Prepare data
260
303
  fs = FeatureSet(workbench_model.get_input())
261
- df = fs.pull_dataframe()
304
+ df = workbench_model.training_view().pull_dataframe()
305
+
306
+ # Get id column - assuming FeatureSet has an id_column attribute or similar
307
+ id_col = fs.id_column
308
+ target_col = workbench_model.target()
262
309
  feature_cols = workbench_model.features()
310
+
263
311
  # Convert string features to categorical
264
312
  for col in feature_cols:
265
313
  if df[col].dtype in ["object", "string"]:
266
314
  df[col] = df[col].astype("category")
267
- # Split X and y
268
- X = df[workbench_model.features()]
269
- y = df[workbench_model.target()]
270
315
 
271
- # Encode target if it's a classification problem
316
+ X = df[feature_cols]
317
+ y = df[target_col]
318
+ ids = df[id_col]
319
+
320
+ # Encode target if classifier
272
321
  label_encoder = LabelEncoder() if is_classifier else None
273
322
  if label_encoder:
274
- y = pd.Series(label_encoder.fit_transform(y), name=workbench_model.target())
323
+ y_encoded = label_encoder.fit_transform(y)
324
+ y_for_cv = pd.Series(y_encoded, index=y.index, name=target_col)
325
+ else:
326
+ y_for_cv = y
327
+
275
328
  # Prepare KFold
276
- kfold = (
277
- StratifiedKFold(n_splits=nfolds, shuffle=True, random_state=42)
278
- if is_classifier
279
- else KFold(n_splits=nfolds, shuffle=True, random_state=42)
280
- )
329
+ kfold = (StratifiedKFold if is_classifier else KFold)(n_splits=nfolds, shuffle=True, random_state=42)
330
+
331
+ # Initialize results collection
332
+ fold_metrics = []
333
+ predictions_df = pd.DataFrame({id_col: ids, target_col: y}) # Keep original values
334
+ # Note: 'prediction' column will be created automatically with correct dtype
281
335
 
282
- fold_results = []
283
- all_predictions = []
284
- all_actuals = []
285
- for fold_idx, (train_idx, val_idx) in enumerate(kfold.split(X, y)):
336
+ # Perform cross-validation
337
+ for fold_idx, (train_idx, val_idx) in enumerate(kfold.split(X, y_for_cv), 1):
286
338
  X_train, X_val = X.iloc[train_idx], X.iloc[val_idx]
287
- y_train, y_val = y.iloc[train_idx], y.iloc[val_idx]
339
+ y_train, y_val = y_for_cv.iloc[train_idx], y_for_cv.iloc[val_idx]
288
340
 
289
- # Train the model
341
+ # Train and predict
290
342
  xgb_model.fit(X_train, y_train)
291
343
  preds = xgb_model.predict(X_val)
292
- all_predictions.extend(preds)
293
- all_actuals.extend(y_val)
294
344
 
295
- # Calculate metrics for this fold
296
- fold_metrics = {"fold": fold_idx + 1}
345
+ # Store predictions (decode if classifier)
346
+ val_indices = X_val.index
347
+ if is_classifier:
348
+ predictions_df.loc[val_indices, "prediction"] = label_encoder.inverse_transform(preds.astype(int))
349
+ else:
350
+ predictions_df.loc[val_indices, "prediction"] = preds
297
351
 
352
+ # Calculate fold metrics
298
353
  if is_classifier:
299
- y_val_original = label_encoder.inverse_transform(y_val)
300
- preds_original = label_encoder.inverse_transform(preds.astype(int))
301
- scores = precision_recall_fscore_support(
302
- y_val_original, preds_original, average="weighted", zero_division=0
354
+ y_val_orig = label_encoder.inverse_transform(y_val)
355
+ preds_orig = label_encoder.inverse_transform(preds.astype(int))
356
+ prec, rec, f1, _ = precision_recall_fscore_support(
357
+ y_val_orig, preds_orig, average="weighted", zero_division=0
303
358
  )
304
- fold_metrics.update({"precision": float(scores[0]), "recall": float(scores[1]), "fscore": float(scores[2])})
359
+ fold_metrics.append({"fold": fold_idx, "precision": prec, "recall": rec, "fscore": f1})
305
360
  else:
306
- fold_metrics.update(
361
+ spearman_corr, _ = spearmanr(y_val, preds)
362
+ fold_metrics.append(
307
363
  {
308
- "rmse": float(np.sqrt(mean_squared_error(y_val, preds))),
309
- "mae": float(mean_absolute_error(y_val, preds)),
310
- "r2": float(r2_score(y_val, preds)),
364
+ "fold": fold_idx,
365
+ "rmse": np.sqrt(mean_squared_error(y_val, preds)),
366
+ "mae": mean_absolute_error(y_val, preds),
367
+ "medae": median_absolute_error(y_val, preds),
368
+ "r2": r2_score(y_val, preds),
369
+ "spearmanr": spearman_corr,
311
370
  }
312
371
  )
313
372
 
314
- fold_results.append(fold_metrics)
315
- # Calculate overall metrics
316
- overall_metrics = {}
317
- if is_classifier:
318
- all_actuals_original = label_encoder.inverse_transform(all_actuals)
319
- all_predictions_original = label_encoder.inverse_transform(all_predictions)
320
- scores = precision_recall_fscore_support(
321
- all_actuals_original, all_predictions_original, average="weighted", zero_division=0
322
- )
323
- overall_metrics.update(
324
- {
325
- "precision": float(scores[0]),
326
- "recall": float(scores[1]),
327
- "fscore": float(scores[2]),
328
- "confusion_matrix": confusion_matrix(
329
- all_actuals_original, all_predictions_original, labels=label_encoder.classes_
330
- ).tolist(),
331
- "label_names": list(label_encoder.classes_),
332
- }
333
- )
334
- else:
335
- overall_metrics.update(
336
- {
337
- "rmse": float(np.sqrt(mean_squared_error(all_actuals, all_predictions))),
338
- "mae": float(mean_absolute_error(all_actuals, all_predictions)),
339
- "r2": float(r2_score(all_actuals, all_predictions)),
340
- }
341
- )
342
- # Calculate summary metrics across folds
343
- summary_metrics = {}
344
- metrics_to_aggregate = ["precision", "recall", "fscore"] if is_classifier else ["rmse", "mae", "r2"]
345
-
346
- for metric in metrics_to_aggregate:
347
- values = [fold[metric] for fold in fold_results]
348
- summary_metrics[metric] = f"{float(np.mean(values)):.3f} ±{float(np.std(values)):.3f}"
349
- # Format fold results as strings (TBD section)
373
+ # Calculate summary metrics (mean ± std)
374
+ fold_df = pd.DataFrame(fold_metrics)
375
+ metric_names = ["precision", "recall", "fscore"] if is_classifier else ["rmse", "mae", "medae", "r2", "spearmanr"]
376
+ summary_metrics = {metric: f"{fold_df[metric].mean():.3f} ±{fold_df[metric].std():.3f}" for metric in metric_names}
377
+
378
+ # Format fold results for display
350
379
  formatted_folds = {}
351
- for fold_data in fold_results:
352
- fold_key = f"Fold {fold_data['fold']}"
380
+ for _, row in fold_df.iterrows():
381
+ fold_key = f"Fold {int(row['fold'])}"
353
382
  if is_classifier:
354
383
  formatted_folds[fold_key] = (
355
- f"precision: {fold_data['precision']:.3f} "
356
- f"recall: {fold_data['recall']:.3f} "
357
- f"fscore: {fold_data['fscore']:.3f}"
384
+ f"precision: {row['precision']:.3f} " f"recall: {row['recall']:.3f} " f"fscore: {row['fscore']:.3f}"
358
385
  )
359
386
  else:
360
387
  formatted_folds[fold_key] = (
361
- f"rmse: {fold_data['rmse']:.3f} mae: {fold_data['mae']:.3f} r2: {fold_data['r2']:.3f}"
388
+ f"rmse: {row['rmse']:.3f} "
389
+ f"mae: {row['mae']:.3f} "
390
+ f"medae: {row['medae']:.3f} "
391
+ f"r2: {row['r2']:.3f} "
392
+ f"spearmanr: {row['spearmanr']:.3f}"
362
393
  )
363
- # Return the results
364
- return {
365
- "summary_metrics": summary_metrics,
366
- # "overall_metrics": overall_metrics,
367
- "folds": formatted_folds,
368
- }
394
+
395
+ # Build return dictionary
396
+ metrics_dict = {"summary_metrics": summary_metrics, "folds": formatted_folds}
397
+
398
+ return metrics_dict, predictions_df
399
+
400
+
401
+ def leave_one_out_inference(workbench_model: Any) -> pd.DataFrame:
402
+ """
403
+ Performs leave-one-out cross-validation (parallelized).
404
+ For datasets > 1000 rows, first identifies top 100 worst predictions via 10-fold CV,
405
+ then performs true leave-one-out on those 100 samples.
406
+ Each model trains on ALL data except one sample.
407
+ """
408
+ from workbench.api import FeatureSet
409
+ from joblib import Parallel, delayed
410
+ from tqdm import tqdm
411
+
412
+ def train_and_predict_one(model_params, is_classifier, X, y, train_idx, val_idx):
413
+ """Train on train_idx, predict on val_idx."""
414
+ model = xgb.XGBClassifier(**model_params) if is_classifier else xgb.XGBRegressor(**model_params)
415
+ model.fit(X[train_idx], y[train_idx])
416
+ return model.predict(X[val_idx])[0]
417
+
418
+ # Load model and get params
419
+ model_artifact_uri = workbench_model.model_data_url()
420
+ loaded_model = xgboost_model_from_s3(model_artifact_uri)
421
+ if loaded_model is None:
422
+ log.error("No XGBoost model found in the artifact.")
423
+ return pd.DataFrame()
424
+
425
+ if isinstance(loaded_model, (xgb.XGBClassifier, xgb.XGBRegressor)):
426
+ is_classifier = isinstance(loaded_model, xgb.XGBClassifier)
427
+ model_params = loaded_model.get_params()
428
+ elif isinstance(loaded_model, xgb.Booster):
429
+ log.warning("Deprecated: Loaded model is a Booster, wrapping in sklearn model.")
430
+ is_classifier = workbench_model.model_type.value == "classifier"
431
+ model_params = {"enable_categorical": True}
432
+ else:
433
+ log.error(f"Unexpected model type: {type(loaded_model)}")
434
+ return pd.DataFrame()
435
+
436
+ # Load and prepare data
437
+ fs = FeatureSet(workbench_model.get_input())
438
+ df = workbench_model.training_view().pull_dataframe()
439
+ id_col = fs.id_column
440
+ target_col = workbench_model.target()
441
+ feature_cols = workbench_model.features()
442
+
443
+ # Convert string features to categorical
444
+ for col in feature_cols:
445
+ if df[col].dtype in ["object", "string"]:
446
+ df[col] = df[col].astype("category")
447
+
448
+ # Determine which samples to run LOO on
449
+ if len(df) > 1000:
450
+ log.important(f"Dataset has {len(df)} rows. Running 10-fold CV to identify top 1000 worst predictions...")
451
+ _, predictions_df = cross_fold_inference(workbench_model, nfolds=10)
452
+ predictions_df["residual_abs"] = np.abs(predictions_df[target_col] - predictions_df["prediction"])
453
+ worst_samples = predictions_df.nlargest(1000, "residual_abs")
454
+ worst_ids = worst_samples[id_col].values
455
+ loo_indices = df[df[id_col].isin(worst_ids)].index.values
456
+ log.important(f"Running leave-one-out CV on 1000 worst samples. Each model trains on {len(df)-1} rows...")
457
+ else:
458
+ log.important(f"Running leave-one-out CV on all {len(df)} samples...")
459
+ loo_indices = df.index.values
460
+
461
+ # Prepare full dataset for training
462
+ X_full = df[feature_cols].values
463
+ y_full = df[target_col].values
464
+
465
+ # Encode target if classifier
466
+ label_encoder = LabelEncoder() if is_classifier else None
467
+ if label_encoder:
468
+ y_full = label_encoder.fit_transform(y_full)
469
+
470
+ # Generate LOO splits
471
+ splits = []
472
+ for loo_idx in loo_indices:
473
+ train_idx = np.delete(np.arange(len(X_full)), loo_idx)
474
+ val_idx = np.array([loo_idx])
475
+ splits.append((train_idx, val_idx))
476
+
477
+ # Parallel execution
478
+ predictions = Parallel(n_jobs=4)(
479
+ delayed(train_and_predict_one)(model_params, is_classifier, X_full, y_full, train_idx, val_idx)
480
+ for train_idx, val_idx in tqdm(splits, desc="LOO CV")
481
+ )
482
+
483
+ # Build results dataframe
484
+ predictions_array = np.array(predictions)
485
+ if label_encoder:
486
+ predictions_array = label_encoder.inverse_transform(predictions_array.astype(int))
487
+
488
+ predictions_df = pd.DataFrame(
489
+ {
490
+ id_col: df.loc[loo_indices, id_col].values,
491
+ target_col: df.loc[loo_indices, target_col].values,
492
+ "prediction": predictions_array,
493
+ }
494
+ )
495
+
496
+ predictions_df["residual_abs"] = np.abs(predictions_df[target_col] - predictions_df["prediction"])
497
+
498
+ return predictions_df
369
499
 
370
500
 
371
501
  if __name__ == "__main__":
372
502
  """Exercise the Model Utilities"""
373
- from workbench.api import Model, FeatureSet
503
+ from workbench.api import Model
374
504
  from pprint import pprint
375
505
 
376
506
  # Test the XGBoost model loading and feature importance
@@ -383,11 +513,28 @@ if __name__ == "__main__":
383
513
  model_artifact_uri = model.model_data_url()
384
514
  xgb_model = xgboost_model_from_s3(model_artifact_uri)
385
515
 
516
+ # Verify enable_categorical is preserved (for debugging/confidence)
517
+ print(f"Model parameters: {xgb_model.get_params()}")
518
+ print(f"enable_categorical: {xgb_model.enable_categorical}")
519
+
386
520
  # Test with UQ Model
387
521
  uq_model = Model("aqsol-uq")
388
522
  _xgb_model = xgboost_model_from_s3(uq_model.model_data_url())
389
523
 
524
+ print("\n=== CROSS FOLD REGRESSION EXAMPLE ===")
525
+ model = Model("abalone-regression")
526
+ results, df = cross_fold_inference(model)
527
+ pprint(results)
528
+ print(df.head())
529
+
530
+ print("\n=== CROSS FOLD CLASSIFICATION EXAMPLE ===")
531
+ model = Model("wine-classification")
532
+ results, df = cross_fold_inference(model)
533
+ pprint(results)
534
+ print(df.head())
535
+
390
536
  # Test XGBoost add_leaf_hash
537
+ """
391
538
  input_df = FeatureSet(model.get_input()).pull_dataframe()
392
539
  leaf_df = add_leaf_hash(model, input_df)
393
540
  print("DataFrame with Leaf Hash:")
@@ -404,13 +551,4 @@ if __name__ == "__main__":
404
551
  stats_df = leaf_stats(leaf_df, target_col)
405
552
  print("DataFrame with Leaf Statistics:")
406
553
  print(stats_df)
407
-
408
- print("\n=== CROSS FOLD REGRESSION EXAMPLE ===")
409
- model = Model("abalone-regression")
410
- results = cross_fold_inference(model)
411
- pprint(results)
412
-
413
- print("\n=== CROSS FOLD CLASSIFICATION EXAMPLE ===")
414
- model = Model("wine-classification")
415
- results = cross_fold_inference(model)
416
- pprint(results)
554
+ """
@@ -72,7 +72,9 @@ class DashboardStatus(PluginInterface):
72
72
  details = "**Redis:** 🔴 Failed to Connect<br>"
73
73
 
74
74
  # Fill in the license details
75
- details += f"**Redis Server:** {config_info['REDIS_HOST']}:{config_info.get('REDIS_PORT', 6379)}<br>"
75
+ redis_host = config_info.get("REDIS_HOST", "NOT SET")
76
+ redis_port = config_info.get("REDIS_PORT", "NOT SET")
77
+ details += f"**Redis Server:** {redis_host}:{redis_port}<br>"
76
78
  details += f"**Workbench S3 Bucket:** {config_info['WORKBENCH_BUCKET']}<br>"
77
79
  details += f"**Plugin Path:** {config_info.get('WORKBENCH_PLUGINS', 'unknown')}<br>"
78
80
  details += f"**Themes Path:** {config_info.get('WORKBENCH_THEMES', 'unknown')}<br>"
@@ -5,7 +5,7 @@ import dash_bootstrap_components as dbc
5
5
 
6
6
  # Workbench Imports
7
7
  from workbench.api.compound import Compound
8
- from workbench.utils.chem_utils import svg_from_smiles
8
+ from workbench.utils.chem_utils.vis import svg_from_smiles
9
9
  from workbench.web_interface.components.plugin_interface import PluginInterface, PluginPage, PluginInputType
10
10
  from workbench.utils.theme_manager import ThemeManager
11
11
  from workbench.utils.ai_summary import AISummary
@@ -159,7 +159,7 @@ class ScatterPlot(PluginInterface):
159
159
  self.df = self.df.drop(columns=aws_cols, errors="ignore")
160
160
 
161
161
  # Set hover columns and custom data
162
- self.hover_columns = kwargs.get("hover_columns", self.df.columns.tolist()[:10])
162
+ self.hover_columns = kwargs.get("hover_columns", sorted(self.df.columns.tolist()[:15]))
163
163
  self.suppress_hover_display = kwargs.get("suppress_hover_display", False)
164
164
  self.custom_data = kwargs.get("custom_data", [])
165
165
 
@@ -427,7 +427,7 @@ if __name__ == "__main__":
427
427
 
428
428
  from workbench.api import DFStore
429
429
 
430
- df = DFStore().get("/workbench/models/aqsol-uq/auto_inference")
430
+ df = DFStore().get("/workbench/models/aqsol-uq-100/full_cross_fold_inference")
431
431
 
432
432
  # Run the Unit Test on the Plugin
433
433
  PluginUnitTest(
@@ -436,6 +436,6 @@ if __name__ == "__main__":
436
436
  theme="midnight_blue",
437
437
  x="solubility",
438
438
  y="prediction",
439
- color="residuals_abs",
439
+ color="prediction_std",
440
440
  suppress_hover_display=True,
441
441
  ).run()