workbench 0.8.174__py3-none-any.whl → 0.8.227__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of workbench might be problematic. Click here for more details.

Files changed (145) hide show
  1. workbench/__init__.py +1 -0
  2. workbench/algorithms/dataframe/__init__.py +1 -2
  3. workbench/algorithms/dataframe/compound_dataset_overlap.py +321 -0
  4. workbench/algorithms/dataframe/feature_space_proximity.py +168 -75
  5. workbench/algorithms/dataframe/fingerprint_proximity.py +422 -86
  6. workbench/algorithms/dataframe/projection_2d.py +44 -21
  7. workbench/algorithms/dataframe/proximity.py +259 -305
  8. workbench/algorithms/graph/light/proximity_graph.py +12 -11
  9. workbench/algorithms/models/cleanlab_model.py +382 -0
  10. workbench/algorithms/models/noise_model.py +388 -0
  11. workbench/algorithms/sql/column_stats.py +0 -1
  12. workbench/algorithms/sql/correlations.py +0 -1
  13. workbench/algorithms/sql/descriptive_stats.py +0 -1
  14. workbench/algorithms/sql/outliers.py +3 -3
  15. workbench/api/__init__.py +5 -1
  16. workbench/api/df_store.py +17 -108
  17. workbench/api/endpoint.py +14 -12
  18. workbench/api/feature_set.py +117 -11
  19. workbench/api/meta.py +0 -1
  20. workbench/api/meta_model.py +289 -0
  21. workbench/api/model.py +52 -21
  22. workbench/api/parameter_store.py +3 -52
  23. workbench/cached/cached_meta.py +0 -1
  24. workbench/cached/cached_model.py +49 -11
  25. workbench/core/artifacts/__init__.py +11 -2
  26. workbench/core/artifacts/artifact.py +7 -7
  27. workbench/core/artifacts/data_capture_core.py +8 -1
  28. workbench/core/artifacts/df_store_core.py +114 -0
  29. workbench/core/artifacts/endpoint_core.py +323 -205
  30. workbench/core/artifacts/feature_set_core.py +249 -45
  31. workbench/core/artifacts/model_core.py +133 -101
  32. workbench/core/artifacts/parameter_store_core.py +98 -0
  33. workbench/core/cloud_platform/aws/aws_account_clamp.py +48 -2
  34. workbench/core/cloud_platform/cloud_meta.py +0 -1
  35. workbench/core/pipelines/pipeline_executor.py +1 -1
  36. workbench/core/transforms/features_to_model/features_to_model.py +60 -44
  37. workbench/core/transforms/model_to_endpoint/model_to_endpoint.py +43 -10
  38. workbench/core/transforms/pandas_transforms/pandas_to_features.py +38 -2
  39. workbench/core/views/training_view.py +113 -42
  40. workbench/core/views/view.py +53 -3
  41. workbench/core/views/view_utils.py +4 -4
  42. workbench/model_script_utils/model_script_utils.py +339 -0
  43. workbench/model_script_utils/pytorch_utils.py +405 -0
  44. workbench/model_script_utils/uq_harness.py +277 -0
  45. workbench/model_scripts/chemprop/chemprop.template +774 -0
  46. workbench/model_scripts/chemprop/generated_model_script.py +774 -0
  47. workbench/model_scripts/chemprop/model_script_utils.py +339 -0
  48. workbench/model_scripts/chemprop/requirements.txt +3 -0
  49. workbench/model_scripts/custom_models/chem_info/fingerprints.py +175 -0
  50. workbench/model_scripts/custom_models/chem_info/mol_descriptors.py +18 -7
  51. workbench/model_scripts/custom_models/chem_info/mol_standardize.py +80 -58
  52. workbench/model_scripts/custom_models/chem_info/molecular_descriptors.py +0 -1
  53. workbench/model_scripts/custom_models/chem_info/morgan_fingerprints.py +1 -2
  54. workbench/model_scripts/custom_models/proximity/feature_space_proximity.py +194 -0
  55. workbench/model_scripts/custom_models/proximity/feature_space_proximity.template +8 -10
  56. workbench/model_scripts/custom_models/uq_models/bayesian_ridge.template +7 -8
  57. workbench/model_scripts/custom_models/uq_models/ensemble_xgb.template +20 -21
  58. workbench/model_scripts/custom_models/uq_models/feature_space_proximity.py +194 -0
  59. workbench/model_scripts/custom_models/uq_models/gaussian_process.template +5 -11
  60. workbench/model_scripts/custom_models/uq_models/ngboost.template +15 -16
  61. workbench/model_scripts/ensemble_xgb/ensemble_xgb.template +15 -17
  62. workbench/model_scripts/meta_model/generated_model_script.py +209 -0
  63. workbench/model_scripts/meta_model/meta_model.template +209 -0
  64. workbench/model_scripts/pytorch_model/generated_model_script.py +443 -499
  65. workbench/model_scripts/pytorch_model/model_script_utils.py +339 -0
  66. workbench/model_scripts/pytorch_model/pytorch.template +440 -496
  67. workbench/model_scripts/pytorch_model/pytorch_utils.py +405 -0
  68. workbench/model_scripts/pytorch_model/requirements.txt +1 -1
  69. workbench/model_scripts/pytorch_model/uq_harness.py +277 -0
  70. workbench/model_scripts/scikit_learn/generated_model_script.py +7 -12
  71. workbench/model_scripts/scikit_learn/scikit_learn.template +4 -9
  72. workbench/model_scripts/script_generation.py +15 -12
  73. workbench/model_scripts/uq_models/generated_model_script.py +248 -0
  74. workbench/model_scripts/xgb_model/generated_model_script.py +371 -403
  75. workbench/model_scripts/xgb_model/model_script_utils.py +339 -0
  76. workbench/model_scripts/xgb_model/uq_harness.py +277 -0
  77. workbench/model_scripts/xgb_model/xgb_model.template +367 -399
  78. workbench/repl/workbench_shell.py +18 -14
  79. workbench/resources/open_source_api.key +1 -1
  80. workbench/scripts/endpoint_test.py +162 -0
  81. workbench/scripts/lambda_test.py +73 -0
  82. workbench/scripts/meta_model_sim.py +35 -0
  83. workbench/scripts/ml_pipeline_sqs.py +122 -6
  84. workbench/scripts/training_test.py +85 -0
  85. workbench/themes/dark/custom.css +59 -0
  86. workbench/themes/dark/plotly.json +5 -5
  87. workbench/themes/light/custom.css +153 -40
  88. workbench/themes/light/plotly.json +9 -9
  89. workbench/themes/midnight_blue/custom.css +59 -0
  90. workbench/utils/aws_utils.py +0 -1
  91. workbench/utils/chem_utils/fingerprints.py +87 -46
  92. workbench/utils/chem_utils/mol_descriptors.py +18 -7
  93. workbench/utils/chem_utils/mol_standardize.py +80 -58
  94. workbench/utils/chem_utils/projections.py +16 -6
  95. workbench/utils/chem_utils/vis.py +25 -27
  96. workbench/utils/chemprop_utils.py +141 -0
  97. workbench/utils/config_manager.py +2 -6
  98. workbench/utils/endpoint_utils.py +5 -7
  99. workbench/utils/license_manager.py +2 -6
  100. workbench/utils/markdown_utils.py +57 -0
  101. workbench/utils/meta_model_simulator.py +499 -0
  102. workbench/utils/metrics_utils.py +256 -0
  103. workbench/utils/model_utils.py +274 -87
  104. workbench/utils/pipeline_utils.py +0 -1
  105. workbench/utils/plot_utils.py +159 -34
  106. workbench/utils/pytorch_utils.py +87 -0
  107. workbench/utils/shap_utils.py +11 -57
  108. workbench/utils/theme_manager.py +95 -30
  109. workbench/utils/xgboost_local_crossfold.py +267 -0
  110. workbench/utils/xgboost_model_utils.py +127 -220
  111. workbench/web_interface/components/experiments/outlier_plot.py +0 -1
  112. workbench/web_interface/components/model_plot.py +16 -2
  113. workbench/web_interface/components/plugin_unit_test.py +5 -3
  114. workbench/web_interface/components/plugins/ag_table.py +2 -4
  115. workbench/web_interface/components/plugins/confusion_matrix.py +3 -6
  116. workbench/web_interface/components/plugins/model_details.py +48 -80
  117. workbench/web_interface/components/plugins/scatter_plot.py +192 -92
  118. workbench/web_interface/components/settings_menu.py +184 -0
  119. workbench/web_interface/page_views/main_page.py +0 -1
  120. {workbench-0.8.174.dist-info → workbench-0.8.227.dist-info}/METADATA +31 -17
  121. {workbench-0.8.174.dist-info → workbench-0.8.227.dist-info}/RECORD +125 -111
  122. {workbench-0.8.174.dist-info → workbench-0.8.227.dist-info}/entry_points.txt +4 -0
  123. {workbench-0.8.174.dist-info → workbench-0.8.227.dist-info}/licenses/LICENSE +1 -1
  124. workbench/core/cloud_platform/aws/aws_df_store.py +0 -404
  125. workbench/core/cloud_platform/aws/aws_parameter_store.py +0 -280
  126. workbench/model_scripts/custom_models/meta_endpoints/example.py +0 -53
  127. workbench/model_scripts/custom_models/proximity/generated_model_script.py +0 -138
  128. workbench/model_scripts/custom_models/proximity/proximity.py +0 -384
  129. workbench/model_scripts/custom_models/uq_models/generated_model_script.py +0 -393
  130. workbench/model_scripts/custom_models/uq_models/mapie.template +0 -502
  131. workbench/model_scripts/custom_models/uq_models/meta_uq.template +0 -386
  132. workbench/model_scripts/custom_models/uq_models/proximity.py +0 -384
  133. workbench/model_scripts/ensemble_xgb/generated_model_script.py +0 -279
  134. workbench/model_scripts/quant_regression/quant_regression.template +0 -279
  135. workbench/model_scripts/quant_regression/requirements.txt +0 -1
  136. workbench/themes/quartz/base_css.url +0 -1
  137. workbench/themes/quartz/custom.css +0 -117
  138. workbench/themes/quartz/plotly.json +0 -642
  139. workbench/themes/quartz_dark/base_css.url +0 -1
  140. workbench/themes/quartz_dark/custom.css +0 -131
  141. workbench/themes/quartz_dark/plotly.json +0 -642
  142. workbench/utils/fast_inference.py +0 -167
  143. workbench/utils/resource_utils.py +0 -39
  144. {workbench-0.8.174.dist-info → workbench-0.8.227.dist-info}/WHEEL +0 -0
  145. {workbench-0.8.174.dist-info → workbench-0.8.227.dist-info}/top_level.txt +0 -0
@@ -12,16 +12,8 @@ from typing import Union, Optional
12
12
  import hashlib
13
13
 
14
14
  # Model Performance Scores
15
- from sklearn.metrics import (
16
- mean_absolute_error,
17
- r2_score,
18
- median_absolute_error,
19
- roc_auc_score,
20
- confusion_matrix,
21
- precision_recall_fscore_support,
22
- mean_squared_error,
23
- )
24
- from sklearn.preprocessing import OneHotEncoder
15
+ from sklearn.metrics import confusion_matrix
16
+ from workbench.utils.metrics_utils import compute_regression_metrics, compute_classification_metrics
25
17
 
26
18
  # SageMaker Imports
27
19
  from sagemaker.serializers import CSVSerializer
@@ -30,13 +22,15 @@ from sagemaker import Predictor
30
22
 
31
23
  # Workbench Imports
32
24
  from workbench.core.artifacts.artifact import Artifact
33
- from workbench.core.artifacts import FeatureSetCore, ModelCore, ModelType
25
+ from workbench.core.artifacts import FeatureSetCore, ModelCore, ModelType, ModelFramework
34
26
  from workbench.utils.endpoint_metrics import EndpointMetrics
35
- from workbench.utils.fast_inference import fast_inference
36
27
  from workbench.utils.cache import Cache
37
28
  from workbench.utils.s3_utils import compute_s3_object_hash
38
29
  from workbench.utils.model_utils import uq_metrics
39
- from workbench.utils.xgboost_model_utils import cross_fold_inference
30
+ from workbench.utils.xgboost_model_utils import pull_cv_results as xgboost_pull_cv
31
+ from workbench.utils.pytorch_utils import pull_cv_results as pytorch_pull_cv
32
+ from workbench.utils.chemprop_utils import pull_cv_results as chemprop_pull_cv
33
+ from workbench_bridges.endpoints.fast_inference import fast_inference
40
34
 
41
35
 
42
36
  class EndpointCore(Artifact):
@@ -336,12 +330,8 @@ class EndpointCore(Artifact):
336
330
  self.details()
337
331
  return True
338
332
 
339
- def auto_inference(self, capture: bool = False) -> pd.DataFrame:
340
- """Run inference on the endpoint using FeatureSet data
341
-
342
- Args:
343
- capture (bool, optional): Capture the inference results and metrics (default=False)
344
- """
333
+ def auto_inference(self) -> pd.DataFrame:
334
+ """Run inference on the endpoint using the test data from the model training view"""
345
335
 
346
336
  # Sanity Check that we have a model
347
337
  model = ModelCore(self.get_input())
@@ -349,22 +339,40 @@ class EndpointCore(Artifact):
349
339
  self.log.error("No model found for this endpoint. Returning empty DataFrame.")
350
340
  return pd.DataFrame()
351
341
 
352
- # Now get the FeatureSet and make sure it exists
353
- fs = FeatureSetCore(model.get_input())
354
- if not fs.exists():
355
- self.log.error("No FeatureSet found for this endpoint. Returning empty DataFrame.")
342
+ # Grab the evaluation data from the Model's training view
343
+ all_df = model.training_view().pull_dataframe()
344
+ eval_df = all_df[~all_df["training"]]
345
+
346
+ # Remove AWS created columns
347
+ aws_cols = ["write_time", "api_invocation_time", "is_deleted", "event_time"]
348
+ eval_df = eval_df.drop(columns=aws_cols, errors="ignore")
349
+
350
+ # Run inference
351
+ return self.inference(eval_df, "auto_inference")
352
+
353
+ def full_inference(self) -> pd.DataFrame:
354
+ """Run inference on the endpoint using all the data from the model training view"""
355
+
356
+ # Sanity Check that we have a model
357
+ model = ModelCore(self.get_input())
358
+ if not model.exists():
359
+ self.log.error("No model found for this endpoint. Returning empty DataFrame.")
356
360
  return pd.DataFrame()
357
361
 
358
- # Grab the evaluation data from the FeatureSet
359
- table = fs.view("training").table
360
- eval_df = fs.query(f'SELECT * FROM "{table}" where training = FALSE')
361
- capture_name = "auto_inference" if capture else None
362
- return self.inference(eval_df, capture_name, id_column=fs.id_column)
362
+ # Grab the full data from the Model's training view
363
+ eval_df = model.training_view().pull_dataframe()
364
+
365
+ # Remove AWS created columns
366
+ aws_cols = ["write_time", "api_invocation_time", "is_deleted", "event_time"]
367
+ eval_df = eval_df.drop(columns=aws_cols, errors="ignore")
368
+
369
+ # Run inference
370
+ return self.inference(eval_df, "full_inference")
363
371
 
364
372
  def inference(
365
373
  self, eval_df: pd.DataFrame, capture_name: str = None, id_column: str = None, drop_error_rows: bool = False
366
374
  ) -> pd.DataFrame:
367
- """Run inference and compute performance metrics with optional capture
375
+ """Run inference on the Endpoint using the provided DataFrame
368
376
 
369
377
  Args:
370
378
  eval_df (pd.DataFrame): DataFrame to run predictions on (must have superset of features)
@@ -387,7 +395,7 @@ class EndpointCore(Artifact):
387
395
  # Grab the model features and target column
388
396
  model = ModelCore(self.model_name)
389
397
  features = model.features()
390
- target_column = model.target()
398
+ targets = model.target() # Note: We have multi-target models (so this could be a list)
391
399
 
392
400
  # Run predictions on the evaluation data
393
401
  prediction_df = self._predict(eval_df, features, drop_error_rows)
@@ -395,65 +403,213 @@ class EndpointCore(Artifact):
395
403
  self.log.warning("No predictions were made. Returning empty DataFrame.")
396
404
  return prediction_df
397
405
 
406
+ # Normalize targets to handle both string and list formats
407
+ if isinstance(targets, list):
408
+ primary_target = targets[0] if targets else None
409
+ else:
410
+ primary_target = targets
411
+
398
412
  # Sanity Check that the target column is present
399
- if target_column and (target_column not in prediction_df.columns):
400
- self.log.important(f"Target Column {target_column} not found in prediction_df!")
413
+ if primary_target not in prediction_df.columns:
414
+ self.log.important(f"Target Column {primary_target} not found in prediction_df!")
401
415
  self.log.important("In order to compute metrics, the target column must be present!")
402
- return prediction_df
416
+ metrics = pd.DataFrame()
403
417
 
404
418
  # Compute the standard performance metrics for this model
405
- model_type = model.model_type
406
- if model_type in [ModelType.REGRESSOR, ModelType.UQ_REGRESSOR, ModelType.ENSEMBLE_REGRESSOR]:
407
- prediction_df = self.residuals(target_column, prediction_df)
408
- metrics = self.regression_metrics(target_column, prediction_df)
409
- elif model_type == ModelType.CLASSIFIER:
410
- metrics = self.classification_metrics(target_column, prediction_df)
411
419
  else:
412
- # For other model types, we don't compute metrics
413
- self.log.info(f"Model Type: {model_type} doesn't have metrics...")
414
- metrics = pd.DataFrame()
420
+ if model.model_type in [ModelType.REGRESSOR, ModelType.UQ_REGRESSOR, ModelType.ENSEMBLE_REGRESSOR]:
421
+ prediction_df = self.residuals(primary_target, prediction_df)
422
+ metrics = self.regression_metrics(primary_target, prediction_df)
423
+ elif model.model_type == ModelType.CLASSIFIER:
424
+ metrics = self.classification_metrics(primary_target, prediction_df)
425
+ else:
426
+ # For other model types, we don't compute metrics
427
+ self.log.info(f"Model Type: {model.model_type} doesn't have metrics...")
428
+ metrics = pd.DataFrame()
415
429
 
416
430
  # Print out the metrics
417
- if not metrics.empty:
418
- print(f"Performance Metrics for {self.model_name} on {self.name}")
419
- print(metrics.head())
431
+ print(f"Performance Metrics for {self.model_name} on {self.name}")
432
+ print(metrics.head())
420
433
 
421
- # Capture the inference results and metrics
422
- if capture_name is not None:
423
- description = capture_name.replace("_", " ").title()
424
- features = model.features()
425
- self._capture_inference_results(
426
- capture_name, prediction_df, target_column, model_type, metrics, description, features, id_column
427
- )
434
+ # Capture the inference results and metrics
435
+ if primary_target and capture_name:
436
+
437
+ # If we don't have an id_column, we'll pull it from the model's FeatureSet
438
+ if id_column is None:
439
+ fs = FeatureSetCore(model.get_input())
440
+ id_column = fs.id_column
428
441
 
429
- # For UQ Models we also capture the uncertainty metrics
430
- if model_type in [ModelType.UQ_REGRESSOR]:
431
- metrics = uq_metrics(prediction_df, target_column)
442
+ # Normalize targets to a list for iteration
443
+ target_list = targets if isinstance(targets, list) else [targets]
444
+ primary_target = target_list[0]
432
445
 
433
- # Now put into the Parameter Store Model Inference Namespace
434
- self.param_store.upsert(f"/workbench/models/{model.name}/inference/{capture_name}", metrics)
446
+ # For single-target models (99% of cases), just save with capture_name
447
+ # For multi-target models, save each as {prefix}_{target} plus primary as capture_name
448
+ is_multi_target = len(target_list) > 1
449
+
450
+ if is_multi_target:
451
+ prefix = "auto" if capture_name == "auto_inference" else capture_name
452
+
453
+ for target in target_list:
454
+ # Drop rows with NaN target values for metrics/plots
455
+ target_df = prediction_df.dropna(subset=[target])
456
+
457
+ # For multi-target models, prediction column is {target}_pred, otherwise "prediction"
458
+ pred_col = f"{target}_pred" if is_multi_target else "prediction"
459
+
460
+ # Compute per-target metrics
461
+ if model.model_type in [ModelType.REGRESSOR, ModelType.UQ_REGRESSOR, ModelType.ENSEMBLE_REGRESSOR]:
462
+ target_metrics = self.regression_metrics(target, target_df, prediction_col=pred_col)
463
+ elif model.model_type == ModelType.CLASSIFIER:
464
+ target_metrics = self.classification_metrics(target, target_df, prediction_col=pred_col)
465
+ else:
466
+ target_metrics = pd.DataFrame()
467
+
468
+ if is_multi_target:
469
+ # Multi-target: save as {prefix}_{target}
470
+ target_capture_name = f"{prefix}_{target}"
471
+ description = target_capture_name.replace("_", " ").title()
472
+ self._capture_inference_results(
473
+ target_capture_name,
474
+ target_df,
475
+ target,
476
+ model.model_type,
477
+ target_metrics,
478
+ description,
479
+ features,
480
+ id_column,
481
+ )
482
+
483
+ # Save primary target (or single target) with original capture_name
484
+ if target == primary_target:
485
+ self._capture_inference_results(
486
+ capture_name,
487
+ target_df,
488
+ target,
489
+ model.model_type,
490
+ target_metrics,
491
+ capture_name.replace("_", " ").title(),
492
+ features,
493
+ id_column,
494
+ )
495
+
496
+ # Capture uncertainty metrics if prediction_std is available (UQ, ChemProp, etc.)
497
+ if "prediction_std" in prediction_df.columns:
498
+ metrics = uq_metrics(prediction_df, primary_target)
499
+ self.param_store.upsert(f"/workbench/models/{model.name}/inference/{capture_name}", metrics)
435
500
 
436
501
  # Return the prediction DataFrame
437
502
  return prediction_df
438
503
 
439
- def cross_fold_inference(self, nfolds: int = 5) -> dict:
440
- """Run cross-fold inference (only works for XGBoost models)
441
-
442
- Args:
443
- nfolds (int): Number of folds to use for cross-fold (default: 5)
504
+ def cross_fold_inference(self) -> pd.DataFrame:
505
+ """Pull cross-fold inference training results for this Endpoint's model
444
506
 
445
507
  Returns:
446
- dict: Dictionary with the cross-fold inference results
508
+ pd.DataFrame: A DataFrame with cross fold predictions
447
509
  """
448
510
 
449
511
  # Grab our model
450
512
  model = ModelCore(self.model_name)
451
513
 
452
- # Compute CrossFold Metrics
453
- cross_fold_metrics = cross_fold_inference(model, nfolds=nfolds)
454
- if cross_fold_metrics:
455
- self.param_store.upsert(f"/workbench/models/{model.name}/inference/cross_fold", cross_fold_metrics)
456
- return cross_fold_metrics
514
+ # Compute CrossFold (Metrics and Prediction Dataframe)
515
+ # For PyTorch and ChemProp, pull pre-computed CV results from training
516
+ if model.model_framework in [ModelFramework.UNKNOWN, ModelFramework.XGBOOST]:
517
+ cross_fold_metrics, out_of_fold_df = xgboost_pull_cv(model)
518
+ elif model.model_framework == ModelFramework.PYTORCH:
519
+ cross_fold_metrics, out_of_fold_df = pytorch_pull_cv(model)
520
+ elif model.model_framework == ModelFramework.CHEMPROP:
521
+ cross_fold_metrics, out_of_fold_df = chemprop_pull_cv(model)
522
+ else:
523
+ self.log.error(f"Cross-Fold Inference not supported for Model Framework: {model.model_framework}.")
524
+ return pd.DataFrame()
525
+
526
+ # If the metrics dataframe isn't empty save to the param store
527
+ if not cross_fold_metrics.empty:
528
+ # Convert to list of dictionaries
529
+ metrics = cross_fold_metrics.to_dict(orient="records")
530
+ self.param_store.upsert(f"/workbench/models/{model.name}/inference/cross_fold", metrics)
531
+
532
+ # If the out_of_fold_df is empty return it
533
+ if out_of_fold_df.empty:
534
+ self.log.warning("No out-of-fold predictions were made. Returning empty DataFrame.")
535
+ return out_of_fold_df
536
+
537
+ # Capture the results
538
+ targets = model.target() # Note: We have multi-target models (so this could be a list)
539
+ model_type = model.model_type
540
+
541
+ # Get the id_column from the model's FeatureSet
542
+ fs = FeatureSetCore(model.get_input())
543
+ id_column = fs.id_column
544
+
545
+ # Normalize targets to a list for iteration
546
+ target_list = targets if isinstance(targets, list) else [targets]
547
+ primary_target = target_list[0]
548
+
549
+ # If we don't have a smiles column, try to merge it from the FeatureSet
550
+ if "smiles" not in out_of_fold_df.columns:
551
+ fs_df = fs.query(f'SELECT {fs.id_column}, "smiles" FROM "{fs.athena_table}"')
552
+ if "smiles" in fs_df.columns:
553
+ self.log.info("Merging 'smiles' column from FeatureSet into out-of-fold predictions.")
554
+ out_of_fold_df = out_of_fold_df.merge(fs_df, on=fs.id_column, how="left")
555
+
556
+ # Collect UQ columns (q_*, confidence) for additional tracking (used for hashing)
557
+ additional_columns = [col for col in out_of_fold_df.columns if col.startswith("q_") or col == "confidence"]
558
+ if additional_columns:
559
+ self.log.info(f"UQ columns from training: {', '.join(additional_columns)}")
560
+
561
+ # Capture uncertainty metrics if prediction_std is available (UQ, ChemProp, etc.)
562
+ if "prediction_std" in out_of_fold_df.columns:
563
+ metrics = uq_metrics(out_of_fold_df, primary_target)
564
+ self.param_store.upsert(f"/workbench/models/{model.name}/inference/full_cross_fold", metrics)
565
+
566
+ # For single-target models (99% of cases), just save as "full_cross_fold"
567
+ # For multi-target models, save each as cv_{target} plus primary as "full_cross_fold"
568
+ is_multi_target = len(target_list) > 1
569
+ for target in target_list:
570
+ # Drop rows with NaN target values for metrics/plots
571
+ target_df = out_of_fold_df.dropna(subset=[target])
572
+
573
+ # For multi-target models, prediction column is {target}_pred, otherwise "prediction"
574
+ pred_col = f"{target}_pred" if is_multi_target else "prediction"
575
+
576
+ # Compute per-target metrics
577
+ if model_type in [ModelType.REGRESSOR, ModelType.UQ_REGRESSOR, ModelType.ENSEMBLE_REGRESSOR]:
578
+ target_metrics = self.regression_metrics(target, target_df, prediction_col=pred_col)
579
+ elif model_type == ModelType.CLASSIFIER:
580
+ target_metrics = self.classification_metrics(target, target_df, prediction_col=pred_col)
581
+ else:
582
+ target_metrics = pd.DataFrame()
583
+
584
+ if is_multi_target:
585
+ # Multi-target: save as cv_{target}
586
+ capture_name = f"cv_{target}"
587
+ description = capture_name.replace("_", " ").title()
588
+ self._capture_inference_results(
589
+ capture_name,
590
+ target_df,
591
+ target,
592
+ model_type,
593
+ target_metrics,
594
+ description,
595
+ features=additional_columns,
596
+ id_column=id_column,
597
+ )
598
+
599
+ # Save primary target (or single target) as "full_cross_fold"
600
+ if target == primary_target:
601
+ self._capture_inference_results(
602
+ "full_cross_fold",
603
+ target_df,
604
+ target,
605
+ model_type,
606
+ target_metrics,
607
+ "Full Cross Fold",
608
+ features=additional_columns,
609
+ id_column=id_column,
610
+ )
611
+
612
+ return out_of_fold_df
457
613
 
458
614
  def fast_inference(self, eval_df: pd.DataFrame, threads: int = 4) -> pd.DataFrame:
459
615
  """Run inference on the Endpoint using the provided DataFrame
@@ -648,6 +804,10 @@ class EndpointCore(Artifact):
648
804
  @staticmethod
649
805
  def _hash_dataframe(df: pd.DataFrame, hash_length: int = 8):
650
806
  # Internal: Compute a data hash for the dataframe
807
+ if df.empty:
808
+ return "--hash--"
809
+
810
+ # Sort the dataframe by columns to ensure consistent ordering
651
811
  df = df.copy()
652
812
  df = df.sort_values(by=sorted(df.columns.tolist()))
653
813
  row_hashes = pd.util.hash_pandas_object(df, index=False)
@@ -658,19 +818,19 @@ class EndpointCore(Artifact):
658
818
  self,
659
819
  capture_name: str,
660
820
  pred_results_df: pd.DataFrame,
661
- target_column: str,
821
+ target: str,
662
822
  model_type: ModelType,
663
823
  metrics: pd.DataFrame,
664
824
  description: str,
665
825
  features: list,
666
826
  id_column: str = None,
667
827
  ):
668
- """Internal: Capture the inference results and metrics to S3
828
+ """Internal: Capture the inference results and metrics to S3 for a single target
669
829
 
670
830
  Args:
671
831
  capture_name (str): Name of the inference capture
672
832
  pred_results_df (pd.DataFrame): DataFrame with the prediction results
673
- target_column (str): Name of the target column
833
+ target (str): Target column name
674
834
  model_type (ModelType): Type of the model (e.g. REGRESSOR, CLASSIFIER)
675
835
  metrics (pd.DataFrame): DataFrame with the performance metrics
676
836
  description (str): Description of the inference results
@@ -701,28 +861,12 @@ class EndpointCore(Artifact):
701
861
  self.log.info(f"Writing metrics to {inference_capture_path}/inference_metrics.csv")
702
862
  wr.s3.to_csv(metrics, f"{inference_capture_path}/inference_metrics.csv", index=False)
703
863
 
704
- # Grab the target column, prediction column, any _proba columns, and the ID column (if present)
705
- prediction_col = "prediction" if "prediction" in pred_results_df.columns else "predictions"
706
- output_columns = [target_column, prediction_col]
707
-
708
- # Add any _proba columns to the output columns
709
- output_columns += [col for col in pred_results_df.columns if col.endswith("_proba")]
710
-
711
- # Add any quantile columns to the output columns
712
- output_columns += [col for col in pred_results_df.columns if col.startswith("q_") or col.startswith("qr_")]
713
-
714
- # Add the ID column
715
- if id_column and id_column in pred_results_df.columns:
716
- output_columns.append(id_column)
717
-
718
- # Write the predictions to our S3 Model Inference Folder
719
- self.log.info(f"Writing predictions to {inference_capture_path}/inference_predictions.csv")
720
- subset_df = pred_results_df[output_columns]
721
- wr.s3.to_csv(subset_df, f"{inference_capture_path}/inference_predictions.csv", index=False)
864
+ # Save the inference predictions for this target
865
+ self._save_target_inference(inference_capture_path, pred_results_df, target, id_column)
722
866
 
723
867
  # CLASSIFIER: Write the confusion matrix to our S3 Model Inference Folder
724
868
  if model_type == ModelType.CLASSIFIER:
725
- conf_mtx = self.generate_confusion_matrix(target_column, pred_results_df)
869
+ conf_mtx = self.generate_confusion_matrix(target, pred_results_df)
726
870
  self.log.info(f"Writing confusion matrix to {inference_capture_path}/inference_cm.csv")
727
871
  # Note: Unlike other dataframes here, we want to write the index (labels) to the CSV
728
872
  wr.s3.to_csv(conf_mtx, f"{inference_capture_path}/inference_cm.csv", index=True)
@@ -732,47 +876,58 @@ class EndpointCore(Artifact):
732
876
  model = ModelCore(self.model_name)
733
877
  model._load_inference_metrics(capture_name)
734
878
 
735
- def regression_metrics(self, target_column: str, prediction_df: pd.DataFrame) -> pd.DataFrame:
879
+ def _save_target_inference(
880
+ self,
881
+ inference_capture_path: str,
882
+ pred_results_df: pd.DataFrame,
883
+ target: str,
884
+ id_column: str = None,
885
+ ):
886
+ """Save inference results for a single target.
887
+
888
+ Args:
889
+ inference_capture_path (str): S3 path for inference capture
890
+ pred_results_df (pd.DataFrame): DataFrame with prediction results
891
+ target (str): Target column name
892
+ id_column (str, optional): Name of the ID column
893
+ """
894
+ cols = pred_results_df.columns
895
+
896
+ # Build output columns: id, target, prediction, prediction_std, UQ columns, proba columns
897
+ output_columns = []
898
+ if id_column and id_column in cols:
899
+ output_columns.append(id_column)
900
+ if target and target in cols:
901
+ output_columns.append(target)
902
+
903
+ output_columns += [c for c in ["prediction", "prediction_std"] if c in cols]
904
+
905
+ # Add UQ columns (q_*, confidence) and proba columns
906
+ output_columns += [c for c in cols if c.startswith("q_") or c == "confidence" or c.endswith("_proba")]
907
+
908
+ # Add smiles column if present
909
+ if "smiles" in cols:
910
+ output_columns.append("smiles")
911
+
912
+ # Write the predictions to S3
913
+ output_file = f"{inference_capture_path}/inference_predictions.csv"
914
+ self.log.info(f"Writing predictions to {output_file}")
915
+ wr.s3.to_csv(pred_results_df[output_columns], output_file, index=False)
916
+
917
+ def regression_metrics(
918
+ self, target_column: str, prediction_df: pd.DataFrame, prediction_col: str = "prediction"
919
+ ) -> pd.DataFrame:
736
920
  """Compute the performance metrics for this Endpoint
921
+
737
922
  Args:
738
923
  target_column (str): Name of the target column
739
924
  prediction_df (pd.DataFrame): DataFrame with the prediction results
925
+ prediction_col (str): Name of the prediction column (default: "prediction")
926
+
740
927
  Returns:
741
928
  pd.DataFrame: DataFrame with the performance metrics
742
929
  """
743
-
744
- # Sanity Check the prediction DataFrame
745
- if prediction_df.empty:
746
- self.log.warning("No predictions were made. Returning empty DataFrame.")
747
- return pd.DataFrame()
748
-
749
- # Compute the metrics
750
- try:
751
- y_true = prediction_df[target_column]
752
- prediction_col = "prediction" if "prediction" in prediction_df.columns else "predictions"
753
- y_pred = prediction_df[prediction_col]
754
-
755
- mae = mean_absolute_error(y_true, y_pred)
756
- rmse = np.sqrt(mean_squared_error(y_true, y_pred))
757
- r2 = r2_score(y_true, y_pred)
758
- # Mean Absolute Percentage Error
759
- mape = np.mean(np.where(y_true != 0, np.abs((y_true - y_pred) / y_true), np.abs(y_true - y_pred))) * 100
760
- # Median Absolute Error
761
- medae = median_absolute_error(y_true, y_pred)
762
-
763
- # Organize and return the metrics
764
- metrics = {
765
- "MAE": round(mae, 3),
766
- "RMSE": round(rmse, 3),
767
- "R2": round(r2, 3),
768
- "MAPE": round(mape, 3),
769
- "MedAE": round(medae, 3),
770
- "NumRows": len(prediction_df),
771
- }
772
- return pd.DataFrame.from_records([metrics])
773
- except Exception as e:
774
- self.log.warning(f"Error computing regression metrics: {str(e)}")
775
- return pd.DataFrame()
930
+ return compute_regression_metrics(prediction_df, target_column, prediction_col)
776
931
 
777
932
  def residuals(self, target_column: str, prediction_df: pd.DataFrame) -> pd.DataFrame:
778
933
  """Add the residuals to the prediction DataFrame
@@ -782,11 +937,13 @@ class EndpointCore(Artifact):
782
937
  Returns:
783
938
  pd.DataFrame: DataFrame with two new columns called 'residuals' and 'residuals_abs'
784
939
  """
940
+ # Check for prediction column
941
+ if "prediction" not in prediction_df.columns:
942
+ self.log.warning("No 'prediction' column found. Cannot compute residuals.")
943
+ return prediction_df
785
944
 
786
- # Compute the residuals
787
945
  y_true = prediction_df[target_column]
788
- prediction_col = "prediction" if "prediction" in prediction_df.columns else "predictions"
789
- y_pred = prediction_df[prediction_col]
946
+ y_pred = prediction_df["prediction"]
790
947
 
791
948
  # Check for classification scenario
792
949
  if not pd.api.types.is_numeric_dtype(y_true) or not pd.api.types.is_numeric_dtype(y_pred):
@@ -800,85 +957,22 @@ class EndpointCore(Artifact):
800
957
 
801
958
  return prediction_df
802
959
 
803
- @staticmethod
804
- def validate_proba_columns(prediction_df: pd.DataFrame, class_labels: list, guessing: bool = False):
805
- """Ensure probability columns are correctly aligned with class labels
806
-
807
- Args:
808
- prediction_df (pd.DataFrame): DataFrame with the prediction results
809
- class_labels (list): List of class labels
810
- guessing (bool, optional): Whether we're guessing the class labels. Defaults to False.
811
- """
812
- proba_columns = [col.replace("_proba", "") for col in prediction_df.columns if col.endswith("_proba")]
813
-
814
- if sorted(class_labels) != sorted(proba_columns):
815
- if guessing:
816
- raise ValueError(f"_proba columns {proba_columns} != GUESSED class_labels {class_labels}!")
817
- else:
818
- raise ValueError(f"_proba columns {proba_columns} != class_labels {class_labels}!")
819
-
820
- def classification_metrics(self, target_column: str, prediction_df: pd.DataFrame) -> pd.DataFrame:
960
+ def classification_metrics(
961
+ self, target_column: str, prediction_df: pd.DataFrame, prediction_col: str = "prediction"
962
+ ) -> pd.DataFrame:
821
963
  """Compute the performance metrics for this Endpoint
822
964
 
823
965
  Args:
824
966
  target_column (str): Name of the target column
825
967
  prediction_df (pd.DataFrame): DataFrame with the prediction results
968
+ prediction_col (str): Name of the prediction column (default: "prediction")
826
969
 
827
970
  Returns:
828
971
  pd.DataFrame: DataFrame with the performance metrics
829
972
  """
830
- # Get the class labels from the model
973
+ # Get class labels from the model (metrics_utils will infer if None)
831
974
  class_labels = ModelCore(self.model_name).class_labels()
832
- if class_labels is None:
833
- self.log.warning(
834
- "Class labels not found in the model. Guessing class labels from the prediction DataFrame."
835
- )
836
- class_labels = prediction_df[target_column].unique().tolist()
837
- self.validate_proba_columns(prediction_df, class_labels, guessing=True)
838
- else:
839
- self.validate_proba_columns(prediction_df, class_labels)
840
-
841
- # Calculate precision, recall, fscore, and support, handling zero division
842
- prediction_col = "prediction" if "prediction" in prediction_df.columns else "predictions"
843
- scores = precision_recall_fscore_support(
844
- prediction_df[target_column],
845
- prediction_df[prediction_col],
846
- average=None,
847
- labels=class_labels,
848
- zero_division=0,
849
- )
850
-
851
- # Identify the probability columns and keep them as a Pandas DataFrame
852
- proba_columns = [f"{label}_proba" for label in class_labels]
853
- y_score = prediction_df[proba_columns]
854
-
855
- # One-hot encode the true labels using all class labels (fit with class_labels)
856
- encoder = OneHotEncoder(categories=[class_labels], sparse_output=False)
857
- y_true = encoder.fit_transform(prediction_df[[target_column]])
858
-
859
- # Calculate ROC AUC per label and handle exceptions for missing classes
860
- roc_auc_per_label = []
861
- for i, label in enumerate(class_labels):
862
- try:
863
- roc_auc = roc_auc_score(y_true[:, i], y_score.iloc[:, i])
864
- except ValueError as e:
865
- self.log.warning(f"ROC AUC calculation failed for label {label}.")
866
- self.log.warning(f"{str(e)}")
867
- roc_auc = 0.0
868
- roc_auc_per_label.append(roc_auc)
869
-
870
- # Put the scores into a DataFrame
871
- score_df = pd.DataFrame(
872
- {
873
- target_column: class_labels,
874
- "precision": scores[0],
875
- "recall": scores[1],
876
- "fscore": scores[2],
877
- "roc_auc": roc_auc_per_label,
878
- "support": scores[3],
879
- }
880
- )
881
- return score_df
975
+ return compute_classification_metrics(prediction_df, target_column, class_labels, prediction_col)
882
976
 
883
977
  def generate_confusion_matrix(self, target_column: str, prediction_df: pd.DataFrame) -> pd.DataFrame:
884
978
  """Compute the confusion matrix for this Endpoint
@@ -890,10 +984,20 @@ class EndpointCore(Artifact):
890
984
  Returns:
891
985
  pd.DataFrame: DataFrame with the confusion matrix
892
986
  """
987
+ # Check for prediction column
988
+ if "prediction" not in prediction_df.columns:
989
+ self.log.warning("No 'prediction' column found in DataFrame")
990
+ return pd.DataFrame()
991
+
992
+ # Drop rows with NaN predictions (can't include in confusion matrix)
993
+ nan_mask = prediction_df["prediction"].isna()
994
+ if nan_mask.any():
995
+ n_nan = nan_mask.sum()
996
+ self.log.warning(f"Dropping {n_nan} rows with NaN predictions for confusion matrix")
997
+ prediction_df = prediction_df[~nan_mask].copy()
893
998
 
894
999
  y_true = prediction_df[target_column]
895
- prediction_col = "prediction" if "prediction" in prediction_df.columns else "predictions"
896
- y_pred = prediction_df[prediction_col]
1000
+ y_pred = prediction_df["prediction"]
897
1001
 
898
1002
  # Get model class labels
899
1003
  model_class_labels = ModelCore(self.model_name).class_labels()
@@ -935,9 +1039,9 @@ class EndpointCore(Artifact):
935
1039
  self.upsert_workbench_meta({"workbench_input": input})
936
1040
 
937
1041
  def delete(self):
938
- """ "Delete an existing Endpoint: Underlying Models, Configuration, and Endpoint"""
1042
+ """Delete an existing Endpoint: Underlying Models, Configuration, and Endpoint"""
939
1043
  if not self.exists():
940
- self.log.warning(f"Trying to delete an Model that doesn't exist: {self.name}")
1044
+ self.log.warning(f"Trying to delete an Endpoint that doesn't exist: {self.name}")
941
1045
 
942
1046
  # Remove this endpoint from the list of registered endpoints
943
1047
  self.log.info(f"Removing {self.name} from the list of registered endpoints...")
@@ -981,7 +1085,7 @@ class EndpointCore(Artifact):
981
1085
  # Recursively delete all endpoint S3 artifacts (inference, etc)
982
1086
  # Note: We do not want to delete the data_capture/ files since these
983
1087
  # might be used for collection and data drift analysis
984
- base_endpoint_path = f"{cls.endpoints_s3_path}/{endpoint_name}"
1088
+ base_endpoint_path = f"{cls.endpoints_s3_path}/{endpoint_name}/"
985
1089
  all_s3_objects = wr.s3.list_objects(base_endpoint_path, boto3_session=cls.boto3_session)
986
1090
 
987
1091
  # Filter out objects that contain 'data_capture/' in their path
@@ -1045,7 +1149,7 @@ class EndpointCore(Artifact):
1045
1149
  if __name__ == "__main__":
1046
1150
  """Exercise the Endpoint Class"""
1047
1151
  from workbench.api import FeatureSet
1048
- from workbench.utils.endpoint_utils import fs_evaluation_data
1152
+ from workbench.utils.endpoint_utils import get_evaluation_data
1049
1153
  import random
1050
1154
 
1051
1155
  # Grab an EndpointCore object and pull some information from it
@@ -1053,7 +1157,7 @@ if __name__ == "__main__":
1053
1157
 
1054
1158
  # Test various error conditions (set row 42 length to pd.NA)
1055
1159
  # Note: This test should return ALL rows
1056
- my_eval_df = fs_evaluation_data(my_endpoint)
1160
+ my_eval_df = get_evaluation_data(my_endpoint)
1057
1161
  my_eval_df.at[42, "length"] = pd.NA
1058
1162
  pred_results = my_endpoint.inference(my_eval_df, drop_error_rows=True)
1059
1163
  print(f"Sent rows: {len(my_eval_df)}")
@@ -1061,6 +1165,9 @@ if __name__ == "__main__":
1061
1165
  assert len(pred_results) == len(my_eval_df), "Predictions should match the number of sent rows"
1062
1166
 
1063
1167
  # Now we put in an invalid value
1168
+ print("*" * 80)
1169
+ print("NOW TESTING ERROR CONDITIONS...")
1170
+ print("*" * 80)
1064
1171
  my_eval_df.at[42, "length"] = "invalid_value"
1065
1172
  pred_results = my_endpoint.inference(my_eval_df, drop_error_rows=True)
1066
1173
  print(f"Sent rows: {len(my_eval_df)}")
@@ -1121,16 +1228,21 @@ if __name__ == "__main__":
1121
1228
  # Run Inference where we provide the data
1122
1229
  # Note: This dataframe could be from a FeatureSet or any other source
1123
1230
  print("Running Inference...")
1124
- my_eval_df = fs_evaluation_data(my_endpoint)
1231
+ my_eval_df = get_evaluation_data(my_endpoint)
1125
1232
  pred_results = my_endpoint.inference(my_eval_df)
1126
1233
 
1127
1234
  # Now set capture=True to save inference results and metrics
1128
- my_eval_df = fs_evaluation_data(my_endpoint)
1235
+ my_eval_df = get_evaluation_data(my_endpoint)
1129
1236
  pred_results = my_endpoint.inference(my_eval_df, capture_name="holdout_xyz")
1130
1237
 
1131
1238
  # Run predictions using the fast_inference method
1132
1239
  fast_results = my_endpoint.fast_inference(my_eval_df)
1133
1240
 
1241
+ # Test the cross_fold_inference method
1242
+ print("Running Cross-Fold Inference...")
1243
+ all_results = my_endpoint.cross_fold_inference()
1244
+ print(all_results)
1245
+
1134
1246
  # Run Inference and metrics for a Classification Endpoint
1135
1247
  class_endpoint = EndpointCore("wine-classification")
1136
1248
  auto_predictions = class_endpoint.auto_inference()
@@ -1139,6 +1251,12 @@ if __name__ == "__main__":
1139
1251
  target = "wine_class"
1140
1252
  print(class_endpoint.generate_confusion_matrix(target, auto_predictions))
1141
1253
 
1254
+ # Test the cross_fold_inference method
1255
+ print("Running Cross-Fold Inference...")
1256
+ all_results = class_endpoint.cross_fold_inference()
1257
+ print(all_results)
1258
+ print("All done...")
1259
+
1142
1260
  # Test the class method delete (commented out for now)
1143
1261
  # from workbench.api import Model
1144
1262
  # model = Model("abalone-regression")