workbench 0.8.162__py3-none-any.whl → 0.8.220__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of workbench might be problematic. Click here for more details.

Files changed (147) hide show
  1. workbench/algorithms/dataframe/__init__.py +1 -2
  2. workbench/algorithms/dataframe/compound_dataset_overlap.py +321 -0
  3. workbench/algorithms/dataframe/feature_space_proximity.py +168 -75
  4. workbench/algorithms/dataframe/fingerprint_proximity.py +422 -86
  5. workbench/algorithms/dataframe/projection_2d.py +44 -21
  6. workbench/algorithms/dataframe/proximity.py +259 -305
  7. workbench/algorithms/graph/light/proximity_graph.py +14 -12
  8. workbench/algorithms/models/cleanlab_model.py +382 -0
  9. workbench/algorithms/models/noise_model.py +388 -0
  10. workbench/algorithms/sql/outliers.py +3 -3
  11. workbench/api/__init__.py +5 -1
  12. workbench/api/compound.py +1 -1
  13. workbench/api/df_store.py +17 -108
  14. workbench/api/endpoint.py +18 -5
  15. workbench/api/feature_set.py +121 -15
  16. workbench/api/meta.py +5 -2
  17. workbench/api/meta_model.py +289 -0
  18. workbench/api/model.py +55 -21
  19. workbench/api/monitor.py +1 -16
  20. workbench/api/parameter_store.py +3 -52
  21. workbench/cached/cached_model.py +4 -4
  22. workbench/core/artifacts/__init__.py +11 -2
  23. workbench/core/artifacts/artifact.py +16 -8
  24. workbench/core/artifacts/data_capture_core.py +355 -0
  25. workbench/core/artifacts/df_store_core.py +114 -0
  26. workbench/core/artifacts/endpoint_core.py +382 -253
  27. workbench/core/artifacts/feature_set_core.py +249 -45
  28. workbench/core/artifacts/model_core.py +135 -80
  29. workbench/core/artifacts/monitor_core.py +33 -248
  30. workbench/core/artifacts/parameter_store_core.py +98 -0
  31. workbench/core/cloud_platform/aws/aws_account_clamp.py +50 -1
  32. workbench/core/cloud_platform/aws/aws_meta.py +12 -5
  33. workbench/core/cloud_platform/aws/aws_session.py +4 -4
  34. workbench/core/pipelines/pipeline_executor.py +1 -1
  35. workbench/core/transforms/data_to_features/light/molecular_descriptors.py +4 -4
  36. workbench/core/transforms/features_to_model/features_to_model.py +62 -40
  37. workbench/core/transforms/model_to_endpoint/model_to_endpoint.py +76 -15
  38. workbench/core/transforms/pandas_transforms/pandas_to_features.py +38 -2
  39. workbench/core/views/training_view.py +113 -42
  40. workbench/core/views/view.py +53 -3
  41. workbench/core/views/view_utils.py +4 -4
  42. workbench/model_script_utils/model_script_utils.py +339 -0
  43. workbench/model_script_utils/pytorch_utils.py +405 -0
  44. workbench/model_script_utils/uq_harness.py +278 -0
  45. workbench/model_scripts/chemprop/chemprop.template +649 -0
  46. workbench/model_scripts/chemprop/generated_model_script.py +649 -0
  47. workbench/model_scripts/chemprop/model_script_utils.py +339 -0
  48. workbench/model_scripts/chemprop/requirements.txt +3 -0
  49. workbench/model_scripts/custom_models/chem_info/fingerprints.py +175 -0
  50. workbench/model_scripts/custom_models/chem_info/mol_descriptors.py +483 -0
  51. workbench/model_scripts/custom_models/chem_info/mol_standardize.py +450 -0
  52. workbench/model_scripts/custom_models/chem_info/molecular_descriptors.py +7 -9
  53. workbench/model_scripts/custom_models/chem_info/morgan_fingerprints.py +1 -1
  54. workbench/model_scripts/custom_models/proximity/feature_space_proximity.py +194 -0
  55. workbench/model_scripts/custom_models/proximity/feature_space_proximity.template +8 -10
  56. workbench/model_scripts/custom_models/uq_models/bayesian_ridge.template +7 -8
  57. workbench/model_scripts/custom_models/uq_models/ensemble_xgb.template +20 -21
  58. workbench/model_scripts/custom_models/uq_models/feature_space_proximity.py +194 -0
  59. workbench/model_scripts/custom_models/uq_models/gaussian_process.template +5 -11
  60. workbench/model_scripts/custom_models/uq_models/ngboost.template +30 -18
  61. workbench/model_scripts/custom_models/uq_models/requirements.txt +1 -3
  62. workbench/model_scripts/ensemble_xgb/ensemble_xgb.template +15 -17
  63. workbench/model_scripts/meta_model/generated_model_script.py +209 -0
  64. workbench/model_scripts/meta_model/meta_model.template +209 -0
  65. workbench/model_scripts/pytorch_model/generated_model_script.py +444 -500
  66. workbench/model_scripts/pytorch_model/model_script_utils.py +339 -0
  67. workbench/model_scripts/pytorch_model/pytorch.template +440 -496
  68. workbench/model_scripts/pytorch_model/pytorch_utils.py +405 -0
  69. workbench/model_scripts/pytorch_model/requirements.txt +1 -1
  70. workbench/model_scripts/pytorch_model/uq_harness.py +278 -0
  71. workbench/model_scripts/scikit_learn/generated_model_script.py +7 -12
  72. workbench/model_scripts/scikit_learn/scikit_learn.template +4 -9
  73. workbench/model_scripts/script_generation.py +20 -11
  74. workbench/model_scripts/uq_models/generated_model_script.py +248 -0
  75. workbench/model_scripts/xgb_model/generated_model_script.py +372 -404
  76. workbench/model_scripts/xgb_model/model_script_utils.py +339 -0
  77. workbench/model_scripts/xgb_model/uq_harness.py +278 -0
  78. workbench/model_scripts/xgb_model/xgb_model.template +369 -401
  79. workbench/repl/workbench_shell.py +28 -19
  80. workbench/resources/open_source_api.key +1 -1
  81. workbench/scripts/endpoint_test.py +162 -0
  82. workbench/scripts/lambda_test.py +73 -0
  83. workbench/scripts/meta_model_sim.py +35 -0
  84. workbench/scripts/ml_pipeline_batch.py +137 -0
  85. workbench/scripts/ml_pipeline_sqs.py +186 -0
  86. workbench/scripts/monitor_cloud_watch.py +20 -100
  87. workbench/scripts/training_test.py +85 -0
  88. workbench/utils/aws_utils.py +4 -3
  89. workbench/utils/chem_utils/__init__.py +0 -0
  90. workbench/utils/chem_utils/fingerprints.py +175 -0
  91. workbench/utils/chem_utils/misc.py +194 -0
  92. workbench/utils/chem_utils/mol_descriptors.py +483 -0
  93. workbench/utils/chem_utils/mol_standardize.py +450 -0
  94. workbench/utils/chem_utils/mol_tagging.py +348 -0
  95. workbench/utils/chem_utils/projections.py +219 -0
  96. workbench/utils/chem_utils/salts.py +256 -0
  97. workbench/utils/chem_utils/sdf.py +292 -0
  98. workbench/utils/chem_utils/toxicity.py +250 -0
  99. workbench/utils/chem_utils/vis.py +253 -0
  100. workbench/utils/chemprop_utils.py +141 -0
  101. workbench/utils/cloudwatch_handler.py +1 -1
  102. workbench/utils/cloudwatch_utils.py +137 -0
  103. workbench/utils/config_manager.py +3 -7
  104. workbench/utils/endpoint_utils.py +5 -7
  105. workbench/utils/license_manager.py +2 -6
  106. workbench/utils/meta_model_simulator.py +499 -0
  107. workbench/utils/metrics_utils.py +256 -0
  108. workbench/utils/model_utils.py +278 -79
  109. workbench/utils/monitor_utils.py +44 -62
  110. workbench/utils/pandas_utils.py +3 -3
  111. workbench/utils/pytorch_utils.py +87 -0
  112. workbench/utils/shap_utils.py +11 -57
  113. workbench/utils/workbench_logging.py +0 -3
  114. workbench/utils/workbench_sqs.py +1 -1
  115. workbench/utils/xgboost_local_crossfold.py +267 -0
  116. workbench/utils/xgboost_model_utils.py +127 -219
  117. workbench/web_interface/components/model_plot.py +14 -2
  118. workbench/web_interface/components/plugin_unit_test.py +5 -2
  119. workbench/web_interface/components/plugins/dashboard_status.py +3 -1
  120. workbench/web_interface/components/plugins/generated_compounds.py +1 -1
  121. workbench/web_interface/components/plugins/model_details.py +38 -74
  122. workbench/web_interface/components/plugins/scatter_plot.py +6 -10
  123. {workbench-0.8.162.dist-info → workbench-0.8.220.dist-info}/METADATA +31 -9
  124. {workbench-0.8.162.dist-info → workbench-0.8.220.dist-info}/RECORD +128 -96
  125. workbench-0.8.220.dist-info/entry_points.txt +11 -0
  126. {workbench-0.8.162.dist-info → workbench-0.8.220.dist-info}/licenses/LICENSE +1 -1
  127. workbench/core/cloud_platform/aws/aws_df_store.py +0 -404
  128. workbench/core/cloud_platform/aws/aws_parameter_store.py +0 -280
  129. workbench/model_scripts/custom_models/chem_info/local_utils.py +0 -769
  130. workbench/model_scripts/custom_models/chem_info/tautomerize.py +0 -83
  131. workbench/model_scripts/custom_models/meta_endpoints/example.py +0 -53
  132. workbench/model_scripts/custom_models/proximity/generated_model_script.py +0 -138
  133. workbench/model_scripts/custom_models/proximity/proximity.py +0 -384
  134. workbench/model_scripts/custom_models/uq_models/generated_model_script.py +0 -393
  135. workbench/model_scripts/custom_models/uq_models/mapie_xgb.template +0 -203
  136. workbench/model_scripts/custom_models/uq_models/meta_uq.template +0 -273
  137. workbench/model_scripts/custom_models/uq_models/proximity.py +0 -384
  138. workbench/model_scripts/ensemble_xgb/generated_model_script.py +0 -279
  139. workbench/model_scripts/quant_regression/quant_regression.template +0 -279
  140. workbench/model_scripts/quant_regression/requirements.txt +0 -1
  141. workbench/utils/chem_utils.py +0 -1556
  142. workbench/utils/execution_environment.py +0 -211
  143. workbench/utils/fast_inference.py +0 -167
  144. workbench/utils/resource_utils.py +0 -39
  145. workbench-0.8.162.dist-info/entry_points.txt +0 -5
  146. {workbench-0.8.162.dist-info → workbench-0.8.220.dist-info}/WHEEL +0 -0
  147. {workbench-0.8.162.dist-info → workbench-0.8.220.dist-info}/top_level.txt +0 -0
@@ -12,16 +12,8 @@ from typing import Union, Optional
12
12
  import hashlib
13
13
 
14
14
  # Model Performance Scores
15
- from sklearn.metrics import (
16
- mean_absolute_error,
17
- r2_score,
18
- median_absolute_error,
19
- roc_auc_score,
20
- confusion_matrix,
21
- precision_recall_fscore_support,
22
- mean_squared_error,
23
- )
24
- from sklearn.preprocessing import OneHotEncoder
15
+ from sklearn.metrics import confusion_matrix
16
+ from workbench.utils.metrics_utils import compute_regression_metrics, compute_classification_metrics
25
17
 
26
18
  # SageMaker Imports
27
19
  from sagemaker.serializers import CSVSerializer
@@ -30,13 +22,15 @@ from sagemaker import Predictor
30
22
 
31
23
  # Workbench Imports
32
24
  from workbench.core.artifacts.artifact import Artifact
33
- from workbench.core.artifacts import FeatureSetCore, ModelCore, ModelType
25
+ from workbench.core.artifacts import FeatureSetCore, ModelCore, ModelType, ModelFramework
34
26
  from workbench.utils.endpoint_metrics import EndpointMetrics
35
- from workbench.utils.fast_inference import fast_inference
36
27
  from workbench.utils.cache import Cache
37
28
  from workbench.utils.s3_utils import compute_s3_object_hash
38
29
  from workbench.utils.model_utils import uq_metrics
39
- from workbench.utils.xgboost_model_utils import cross_fold_inference
30
+ from workbench.utils.xgboost_model_utils import pull_cv_results as xgboost_pull_cv
31
+ from workbench.utils.pytorch_utils import pull_cv_results as pytorch_pull_cv
32
+ from workbench.utils.chemprop_utils import pull_cv_results as chemprop_pull_cv
33
+ from workbench_bridges.endpoints.fast_inference import fast_inference
40
34
 
41
35
 
42
36
  class EndpointCore(Artifact):
@@ -164,11 +158,17 @@ class EndpointCore(Artifact):
164
158
  """
165
159
  return "Serverless" in self.endpoint_meta["InstanceType"]
166
160
 
167
- def add_data_capture(self):
161
+ def data_capture(self):
162
+ """Get the MonitorCore class for this endpoint"""
163
+ from workbench.core.artifacts.data_capture_core import DataCaptureCore
164
+
165
+ return DataCaptureCore(self.endpoint_name)
166
+
167
+ def enable_data_capture(self):
168
168
  """Add data capture to the endpoint"""
169
- self.get_monitor().add_data_capture()
169
+ self.data_capture().enable()
170
170
 
171
- def get_monitor(self):
171
+ def monitor(self):
172
172
  """Get the MonitorCore class for this endpoint"""
173
173
  from workbench.core.artifacts.monitor_core import MonitorCore
174
174
 
@@ -330,12 +330,8 @@ class EndpointCore(Artifact):
330
330
  self.details()
331
331
  return True
332
332
 
333
- def auto_inference(self, capture: bool = False) -> pd.DataFrame:
334
- """Run inference on the endpoint using FeatureSet data
335
-
336
- Args:
337
- capture (bool, optional): Capture the inference results and metrics (default=False)
338
- """
333
+ def auto_inference(self) -> pd.DataFrame:
334
+ """Run inference on the endpoint using the test data from the model training view"""
339
335
 
340
336
  # Sanity Check that we have a model
341
337
  model = ModelCore(self.get_input())
@@ -343,22 +339,40 @@ class EndpointCore(Artifact):
343
339
  self.log.error("No model found for this endpoint. Returning empty DataFrame.")
344
340
  return pd.DataFrame()
345
341
 
346
- # Now get the FeatureSet and make sure it exists
347
- fs = FeatureSetCore(model.get_input())
348
- if not fs.exists():
349
- self.log.error("No FeatureSet found for this endpoint. Returning empty DataFrame.")
342
+ # Grab the evaluation data from the Model's training view
343
+ all_df = model.training_view().pull_dataframe()
344
+ eval_df = all_df[~all_df["training"]]
345
+
346
+ # Remove AWS created columns
347
+ aws_cols = ["write_time", "api_invocation_time", "is_deleted", "event_time"]
348
+ eval_df = eval_df.drop(columns=aws_cols, errors="ignore")
349
+
350
+ # Run inference
351
+ return self.inference(eval_df, "auto_inference")
352
+
353
+ def full_inference(self) -> pd.DataFrame:
354
+ """Run inference on the endpoint using all the data from the model training view"""
355
+
356
+ # Sanity Check that we have a model
357
+ model = ModelCore(self.get_input())
358
+ if not model.exists():
359
+ self.log.error("No model found for this endpoint. Returning empty DataFrame.")
350
360
  return pd.DataFrame()
351
361
 
352
- # Grab the evaluation data from the FeatureSet
353
- table = fs.view("training").table
354
- eval_df = fs.query(f'SELECT * FROM "{table}" where training = FALSE')
355
- capture_name = "auto_inference" if capture else None
356
- return self.inference(eval_df, capture_name, id_column=fs.id_column)
362
+ # Grab the full data from the Model's training view
363
+ eval_df = model.training_view().pull_dataframe()
364
+
365
+ # Remove AWS created columns
366
+ aws_cols = ["write_time", "api_invocation_time", "is_deleted", "event_time"]
367
+ eval_df = eval_df.drop(columns=aws_cols, errors="ignore")
368
+
369
+ # Run inference
370
+ return self.inference(eval_df, "full_inference")
357
371
 
358
372
  def inference(
359
373
  self, eval_df: pd.DataFrame, capture_name: str = None, id_column: str = None, drop_error_rows: bool = False
360
374
  ) -> pd.DataFrame:
361
- """Run inference and compute performance metrics with optional capture
375
+ """Run inference on the Endpoint using the provided DataFrame
362
376
 
363
377
  Args:
364
378
  eval_df (pd.DataFrame): DataFrame to run predictions on (must have superset of features)
@@ -378,63 +392,219 @@ class EndpointCore(Artifact):
378
392
  self.log.important("No model associated with this endpoint, running 'no frills' inference...")
379
393
  return self.fast_inference(eval_df)
380
394
 
395
+ # Grab the model features and target column
396
+ model = ModelCore(self.model_name)
397
+ features = model.features()
398
+ targets = model.target() # Note: We have multi-target models (so this could be a list)
399
+
381
400
  # Run predictions on the evaluation data
382
- prediction_df = self._predict(eval_df, drop_error_rows)
401
+ prediction_df = self._predict(eval_df, features, drop_error_rows)
383
402
  if prediction_df.empty:
384
403
  self.log.warning("No predictions were made. Returning empty DataFrame.")
385
404
  return prediction_df
386
405
 
387
- # Get the target column
388
- model = ModelCore(self.model_name)
389
- target_column = model.target()
406
+ # Normalize targets to handle both string and list formats
407
+ if isinstance(targets, list):
408
+ primary_target = targets[0] if targets else None
409
+ else:
410
+ primary_target = targets
390
411
 
391
412
  # Sanity Check that the target column is present
392
- if target_column and (target_column not in prediction_df.columns):
393
- self.log.important(f"Target Column {target_column} not found in prediction_df!")
413
+ if primary_target not in prediction_df.columns:
414
+ self.log.important(f"Target Column {primary_target} not found in prediction_df!")
394
415
  self.log.important("In order to compute metrics, the target column must be present!")
395
- return prediction_df
416
+ metrics = pd.DataFrame()
396
417
 
397
418
  # Compute the standard performance metrics for this model
398
- model_type = model.model_type
399
- if model_type in [ModelType.REGRESSOR, ModelType.UQ_REGRESSOR, ModelType.ENSEMBLE_REGRESSOR]:
400
- prediction_df = self.residuals(target_column, prediction_df)
401
- metrics = self.regression_metrics(target_column, prediction_df)
402
- elif model_type == ModelType.CLASSIFIER:
403
- metrics = self.classification_metrics(target_column, prediction_df)
404
419
  else:
405
- # For other model types, we don't compute metrics
406
- self.log.info(f"Model Type: {model_type} doesn't have metrics...")
407
- metrics = pd.DataFrame()
420
+ if model.model_type in [ModelType.REGRESSOR, ModelType.UQ_REGRESSOR, ModelType.ENSEMBLE_REGRESSOR]:
421
+ prediction_df = self.residuals(primary_target, prediction_df)
422
+ metrics = self.regression_metrics(primary_target, prediction_df)
423
+ elif model.model_type == ModelType.CLASSIFIER:
424
+ metrics = self.classification_metrics(primary_target, prediction_df)
425
+ else:
426
+ # For other model types, we don't compute metrics
427
+ self.log.info(f"Model Type: {model.model_type} doesn't have metrics...")
428
+ metrics = pd.DataFrame()
408
429
 
409
430
  # Print out the metrics
410
- if not metrics.empty:
411
- print(f"Performance Metrics for {self.model_name} on {self.name}")
412
- print(metrics.head())
431
+ print(f"Performance Metrics for {self.model_name} on {self.name}")
432
+ print(metrics.head())
413
433
 
414
- # Capture the inference results and metrics
415
- if capture_name is not None:
416
- description = capture_name.replace("_", " ").title()
417
- features = model.features()
418
- self._capture_inference_results(
419
- capture_name, prediction_df, target_column, model_type, metrics, description, features, id_column
420
- )
434
+ # Capture the inference results and metrics
435
+ if primary_target and capture_name:
436
+
437
+ # If we don't have an id_column, we'll pull it from the model's FeatureSet
438
+ if id_column is None:
439
+ fs = FeatureSetCore(model.get_input())
440
+ id_column = fs.id_column
441
+
442
+ # Normalize targets to a list for iteration
443
+ target_list = targets if isinstance(targets, list) else [targets]
444
+ primary_target = target_list[0]
421
445
 
422
- # Capture CrossFold Inference Results
423
- cross_fold_metrics = cross_fold_inference(model)
424
- if cross_fold_metrics:
425
- # Now put into the Parameter Store Model Inference Namespace
426
- self.param_store.upsert(f"/workbench/models/{model.name}/inference/cross_fold", cross_fold_metrics)
446
+ # For single-target models (99% of cases), just save with capture_name
447
+ # For multi-target models, save each as {prefix}_{target} plus primary as capture_name
448
+ is_multi_target = len(target_list) > 1
427
449
 
428
- # For UQ Models we also capture the uncertainty metrics
429
- if model_type in [ModelType.UQ_REGRESSOR]:
430
- metrics = uq_metrics(prediction_df, target_column)
450
+ if is_multi_target:
451
+ prefix = "auto" if capture_name == "auto_inference" else capture_name
431
452
 
432
- # Now put into the Parameter Store Model Inference Namespace
433
- self.param_store.upsert(f"/workbench/models/{model.name}/inference/{capture_name}", metrics)
453
+ for target in target_list:
454
+ # Drop rows with NaN target values for metrics/plots
455
+ target_df = prediction_df.dropna(subset=[target])
456
+
457
+ # For multi-target models, prediction column is {target}_pred, otherwise "prediction"
458
+ pred_col = f"{target}_pred" if is_multi_target else "prediction"
459
+
460
+ # Compute per-target metrics
461
+ if model.model_type in [ModelType.REGRESSOR, ModelType.UQ_REGRESSOR, ModelType.ENSEMBLE_REGRESSOR]:
462
+ target_metrics = self.regression_metrics(target, target_df, prediction_col=pred_col)
463
+ elif model.model_type == ModelType.CLASSIFIER:
464
+ target_metrics = self.classification_metrics(target, target_df, prediction_col=pred_col)
465
+ else:
466
+ target_metrics = pd.DataFrame()
467
+
468
+ if is_multi_target:
469
+ # Multi-target: save as {prefix}_{target}
470
+ target_capture_name = f"{prefix}_{target}"
471
+ description = target_capture_name.replace("_", " ").title()
472
+ self._capture_inference_results(
473
+ target_capture_name,
474
+ target_df,
475
+ target,
476
+ model.model_type,
477
+ target_metrics,
478
+ description,
479
+ features,
480
+ id_column,
481
+ )
482
+
483
+ # Save primary target (or single target) with original capture_name
484
+ if target == primary_target:
485
+ self._capture_inference_results(
486
+ capture_name,
487
+ target_df,
488
+ target,
489
+ model.model_type,
490
+ target_metrics,
491
+ capture_name.replace("_", " ").title(),
492
+ features,
493
+ id_column,
494
+ )
495
+
496
+ # Capture uncertainty metrics if prediction_std is available (UQ, ChemProp, etc.)
497
+ if "prediction_std" in prediction_df.columns:
498
+ metrics = uq_metrics(prediction_df, primary_target)
499
+ self.param_store.upsert(f"/workbench/models/{model.name}/inference/{capture_name}", metrics)
434
500
 
435
501
  # Return the prediction DataFrame
436
502
  return prediction_df
437
503
 
504
+ def cross_fold_inference(self) -> pd.DataFrame:
505
+ """Pull cross-fold inference training results for this Endpoint's model
506
+
507
+ Returns:
508
+ pd.DataFrame: A DataFrame with cross fold predictions
509
+ """
510
+
511
+ # Grab our model
512
+ model = ModelCore(self.model_name)
513
+
514
+ # Compute CrossFold (Metrics and Prediction Dataframe)
515
+ # For PyTorch and ChemProp, pull pre-computed CV results from training
516
+ if model.model_framework in [ModelFramework.UNKNOWN, ModelFramework.XGBOOST]:
517
+ cross_fold_metrics, out_of_fold_df = xgboost_pull_cv(model)
518
+ elif model.model_framework == ModelFramework.PYTORCH:
519
+ cross_fold_metrics, out_of_fold_df = pytorch_pull_cv(model)
520
+ elif model.model_framework == ModelFramework.CHEMPROP:
521
+ cross_fold_metrics, out_of_fold_df = chemprop_pull_cv(model)
522
+ else:
523
+ self.log.error(f"Cross-Fold Inference not supported for Model Framework: {model.model_framework}.")
524
+ return pd.DataFrame()
525
+
526
+ # If the metrics dataframe isn't empty save to the param store
527
+ if not cross_fold_metrics.empty:
528
+ # Convert to list of dictionaries
529
+ metrics = cross_fold_metrics.to_dict(orient="records")
530
+ self.param_store.upsert(f"/workbench/models/{model.name}/inference/cross_fold", metrics)
531
+
532
+ # If the out_of_fold_df is empty return it
533
+ if out_of_fold_df.empty:
534
+ self.log.warning("No out-of-fold predictions were made. Returning empty DataFrame.")
535
+ return out_of_fold_df
536
+
537
+ # Capture the results
538
+ targets = model.target() # Note: We have multi-target models (so this could be a list)
539
+ model_type = model.model_type
540
+
541
+ # Get the id_column from the model's FeatureSet
542
+ fs = FeatureSetCore(model.get_input())
543
+ id_column = fs.id_column
544
+
545
+ # Normalize targets to a list for iteration
546
+ target_list = targets if isinstance(targets, list) else [targets]
547
+ primary_target = target_list[0]
548
+
549
+ # Collect UQ columns (q_*, confidence) for additional tracking
550
+ additional_columns = [col for col in out_of_fold_df.columns if col.startswith("q_") or col == "confidence"]
551
+ if additional_columns:
552
+ self.log.info(f"UQ columns from training: {', '.join(additional_columns)}")
553
+
554
+ # Capture uncertainty metrics if prediction_std is available (UQ, ChemProp, etc.)
555
+ if "prediction_std" in out_of_fold_df.columns:
556
+ metrics = uq_metrics(out_of_fold_df, primary_target)
557
+ self.param_store.upsert(f"/workbench/models/{model.name}/inference/full_cross_fold", metrics)
558
+
559
+ # For single-target models (99% of cases), just save as "full_cross_fold"
560
+ # For multi-target models, save each as cv_{target} plus primary as "full_cross_fold"
561
+ is_multi_target = len(target_list) > 1
562
+
563
+ for target in target_list:
564
+ # Drop rows with NaN target values for metrics/plots
565
+ target_df = out_of_fold_df.dropna(subset=[target])
566
+
567
+ # For multi-target models, prediction column is {target}_pred, otherwise "prediction"
568
+ pred_col = f"{target}_pred" if is_multi_target else "prediction"
569
+
570
+ # Compute per-target metrics
571
+ if model_type in [ModelType.REGRESSOR, ModelType.UQ_REGRESSOR, ModelType.ENSEMBLE_REGRESSOR]:
572
+ target_metrics = self.regression_metrics(target, target_df, prediction_col=pred_col)
573
+ elif model_type == ModelType.CLASSIFIER:
574
+ target_metrics = self.classification_metrics(target, target_df, prediction_col=pred_col)
575
+ else:
576
+ target_metrics = pd.DataFrame()
577
+
578
+ if is_multi_target:
579
+ # Multi-target: save as cv_{target}
580
+ capture_name = f"cv_{target}"
581
+ description = capture_name.replace("_", " ").title()
582
+ self._capture_inference_results(
583
+ capture_name,
584
+ target_df,
585
+ target,
586
+ model_type,
587
+ target_metrics,
588
+ description,
589
+ features=additional_columns,
590
+ id_column=id_column,
591
+ )
592
+
593
+ # Save primary target (or single target) as "full_cross_fold"
594
+ if target == primary_target:
595
+ self._capture_inference_results(
596
+ "full_cross_fold",
597
+ target_df,
598
+ target,
599
+ model_type,
600
+ target_metrics,
601
+ "Full Cross Fold",
602
+ features=additional_columns,
603
+ id_column=id_column,
604
+ )
605
+
606
+ return out_of_fold_df
607
+
438
608
  def fast_inference(self, eval_df: pd.DataFrame, threads: int = 4) -> pd.DataFrame:
439
609
  """Run inference on the Endpoint using the provided DataFrame
440
610
 
@@ -450,11 +620,12 @@ class EndpointCore(Artifact):
450
620
  """
451
621
  return fast_inference(self.name, eval_df, self.sm_session, threads=threads)
452
622
 
453
- def _predict(self, eval_df: pd.DataFrame, drop_error_rows: bool = False) -> pd.DataFrame:
454
- """Internal: Run prediction on the given observations in the given DataFrame
623
+ def _predict(self, eval_df: pd.DataFrame, features: list[str], drop_error_rows: bool = False) -> pd.DataFrame:
624
+ """Internal: Run prediction on observations in the given DataFrame
455
625
 
456
626
  Args:
457
627
  eval_df (pd.DataFrame): DataFrame to run predictions on (must have superset of features)
628
+ features (list[str]): List of feature column names needed for prediction
458
629
  drop_error_rows (bool): If True, drop rows that had endpoint errors/issues (default=False)
459
630
  Returns:
460
631
  pd.DataFrame: Return the DataFrame with additional columns, prediction and any _proba columns
@@ -465,19 +636,12 @@ class EndpointCore(Artifact):
465
636
  self.log.warning("Evaluation DataFrame has 0 rows. No predictions to run.")
466
637
  return pd.DataFrame(columns=eval_df.columns) # Return empty DataFrame with same structure
467
638
 
468
- # Sanity check: Does the Model have Features?
469
- features = ModelCore(self.model_name).features()
470
- if not features:
471
- self.log.warning("Model does not have features defined, using all columns in the DataFrame")
472
- else:
473
- # Sanity check: Does the DataFrame have the required features?
474
- df_columns_lower = set(col.lower() for col in eval_df.columns)
475
- features_lower = set(feature.lower() for feature in features)
476
-
477
- # Check if the features are a subset of the DataFrame columns (case-insensitive)
478
- if not features_lower.issubset(df_columns_lower):
479
- missing_features = features_lower - df_columns_lower
480
- raise ValueError(f"DataFrame does not contain required features: {missing_features}")
639
+ # Sanity check: Does the DataFrame have the required features?
640
+ df_columns_lower = set(col.lower() for col in eval_df.columns)
641
+ features_lower = set(feature.lower() for feature in features)
642
+ if not features_lower.issubset(df_columns_lower):
643
+ missing_features = features_lower - df_columns_lower
644
+ raise ValueError(f"DataFrame does not contain required features: {missing_features}")
481
645
 
482
646
  # Create our Endpoint Predictor Class
483
647
  predictor = Predictor(
@@ -634,6 +798,10 @@ class EndpointCore(Artifact):
634
798
  @staticmethod
635
799
  def _hash_dataframe(df: pd.DataFrame, hash_length: int = 8):
636
800
  # Internal: Compute a data hash for the dataframe
801
+ if df.empty:
802
+ return "--hash--"
803
+
804
+ # Sort the dataframe by columns to ensure consistent ordering
637
805
  df = df.copy()
638
806
  df = df.sort_values(by=sorted(df.columns.tolist()))
639
807
  row_hashes = pd.util.hash_pandas_object(df, index=False)
@@ -644,19 +812,19 @@ class EndpointCore(Artifact):
644
812
  self,
645
813
  capture_name: str,
646
814
  pred_results_df: pd.DataFrame,
647
- target_column: str,
815
+ target: str,
648
816
  model_type: ModelType,
649
817
  metrics: pd.DataFrame,
650
818
  description: str,
651
819
  features: list,
652
820
  id_column: str = None,
653
821
  ):
654
- """Internal: Capture the inference results and metrics to S3
822
+ """Internal: Capture the inference results and metrics to S3 for a single target
655
823
 
656
824
  Args:
657
825
  capture_name (str): Name of the inference capture
658
826
  pred_results_df (pd.DataFrame): DataFrame with the prediction results
659
- target_column (str): Name of the target column
827
+ target (str): Target column name
660
828
  model_type (ModelType): Type of the model (e.g. REGRESSOR, CLASSIFIER)
661
829
  metrics (pd.DataFrame): DataFrame with the performance metrics
662
830
  description (str): Description of the inference results
@@ -687,86 +855,69 @@ class EndpointCore(Artifact):
687
855
  self.log.info(f"Writing metrics to {inference_capture_path}/inference_metrics.csv")
688
856
  wr.s3.to_csv(metrics, f"{inference_capture_path}/inference_metrics.csv", index=False)
689
857
 
690
- # Grab the target column, prediction column, any _proba columns, and the ID column (if present)
691
- prediction_col = "prediction" if "prediction" in pred_results_df.columns else "predictions"
692
- output_columns = [target_column, prediction_col]
693
-
694
- # Add any _proba columns to the output columns
695
- output_columns += [col for col in pred_results_df.columns if col.endswith("_proba")]
696
-
697
- # Add any quantile columns to the output columns
698
- output_columns += [col for col in pred_results_df.columns if col.startswith("q_") or col.startswith("qr_")]
699
-
700
- # Add the ID column
701
- if id_column and id_column in pred_results_df.columns:
702
- output_columns.append(id_column)
703
-
704
- # Write the predictions to our S3 Model Inference Folder
705
- self.log.info(f"Writing predictions to {inference_capture_path}/inference_predictions.csv")
706
- subset_df = pred_results_df[output_columns]
707
- wr.s3.to_csv(subset_df, f"{inference_capture_path}/inference_predictions.csv", index=False)
858
+ # Save the inference predictions for this target
859
+ self._save_target_inference(inference_capture_path, pred_results_df, target, id_column)
708
860
 
709
861
  # CLASSIFIER: Write the confusion matrix to our S3 Model Inference Folder
710
862
  if model_type == ModelType.CLASSIFIER:
711
- conf_mtx = self.generate_confusion_matrix(target_column, pred_results_df)
863
+ conf_mtx = self.generate_confusion_matrix(target, pred_results_df)
712
864
  self.log.info(f"Writing confusion matrix to {inference_capture_path}/inference_cm.csv")
713
865
  # Note: Unlike other dataframes here, we want to write the index (labels) to the CSV
714
866
  wr.s3.to_csv(conf_mtx, f"{inference_capture_path}/inference_cm.csv", index=True)
715
867
 
716
- # Generate SHAP values for our Prediction Dataframe
717
- # generate_shap_values(self.endpoint_name, model_type.value, pred_results_df, inference_capture_path)
718
-
719
868
  # Now recompute the details for our Model
720
- self.log.important(f"Recomputing Details for {self.model_name} to show latest Inference Results...")
869
+ self.log.important(f"Loading inference metrics for {self.model_name}...")
721
870
  model = ModelCore(self.model_name)
722
871
  model._load_inference_metrics(capture_name)
723
- model.details()
724
872
 
725
- # Recompute the details so that inference model metrics are updated
726
- self.log.important(f"Recomputing Details for {self.name} to show latest Inference Results...")
727
- self.details()
873
+ def _save_target_inference(
874
+ self,
875
+ inference_capture_path: str,
876
+ pred_results_df: pd.DataFrame,
877
+ target: str,
878
+ id_column: str = None,
879
+ ):
880
+ """Save inference results for a single target.
881
+
882
+ Args:
883
+ inference_capture_path (str): S3 path for inference capture
884
+ pred_results_df (pd.DataFrame): DataFrame with prediction results
885
+ target (str): Target column name
886
+ id_column (str, optional): Name of the ID column
887
+ """
888
+ cols = pred_results_df.columns
889
+
890
+ # Build output columns: id, target, prediction, prediction_std, UQ columns, proba columns
891
+ output_columns = []
892
+ if id_column and id_column in cols:
893
+ output_columns.append(id_column)
894
+ if target and target in cols:
895
+ output_columns.append(target)
896
+
897
+ output_columns += [c for c in ["prediction", "prediction_std"] if c in cols]
728
898
 
729
- def regression_metrics(self, target_column: str, prediction_df: pd.DataFrame) -> pd.DataFrame:
899
+ # Add UQ columns (q_*, confidence) and proba columns
900
+ output_columns += [c for c in cols if c.startswith("q_") or c == "confidence" or c.endswith("_proba")]
901
+
902
+ # Write the predictions to S3
903
+ output_file = f"{inference_capture_path}/inference_predictions.csv"
904
+ self.log.info(f"Writing predictions to {output_file}")
905
+ wr.s3.to_csv(pred_results_df[output_columns], output_file, index=False)
906
+
907
+ def regression_metrics(
908
+ self, target_column: str, prediction_df: pd.DataFrame, prediction_col: str = "prediction"
909
+ ) -> pd.DataFrame:
730
910
  """Compute the performance metrics for this Endpoint
911
+
731
912
  Args:
732
913
  target_column (str): Name of the target column
733
914
  prediction_df (pd.DataFrame): DataFrame with the prediction results
915
+ prediction_col (str): Name of the prediction column (default: "prediction")
916
+
734
917
  Returns:
735
918
  pd.DataFrame: DataFrame with the performance metrics
736
919
  """
737
-
738
- # Sanity Check the prediction DataFrame
739
- if prediction_df.empty:
740
- self.log.warning("No predictions were made. Returning empty DataFrame.")
741
- return pd.DataFrame()
742
-
743
- # Compute the metrics
744
- try:
745
- y_true = prediction_df[target_column]
746
- prediction_col = "prediction" if "prediction" in prediction_df.columns else "predictions"
747
- y_pred = prediction_df[prediction_col]
748
-
749
- mae = mean_absolute_error(y_true, y_pred)
750
- rmse = np.sqrt(mean_squared_error(y_true, y_pred))
751
- r2 = r2_score(y_true, y_pred)
752
- # Mean Absolute Percentage Error
753
- mape = np.mean(np.where(y_true != 0, np.abs((y_true - y_pred) / y_true), np.abs(y_true - y_pred))) * 100
754
- # Median Absolute Error
755
- medae = median_absolute_error(y_true, y_pred)
756
-
757
- # Organize and return the metrics
758
- metrics = {
759
- "MAE": round(mae, 3),
760
- "RMSE": round(rmse, 3),
761
- "R2": round(r2, 3),
762
- "MAPE": round(mape, 3),
763
- "MedAE": round(medae, 3),
764
- "NumRows": len(prediction_df),
765
- }
766
- return pd.DataFrame.from_records([metrics])
767
- except Exception as e:
768
- self.log.warning(f"Error computing regression metrics: {str(e)}")
769
- return pd.DataFrame()
920
+ return compute_regression_metrics(prediction_df, target_column, prediction_col)
770
921
 
771
922
  def residuals(self, target_column: str, prediction_df: pd.DataFrame) -> pd.DataFrame:
772
923
  """Add the residuals to the prediction DataFrame
@@ -776,11 +927,13 @@ class EndpointCore(Artifact):
776
927
  Returns:
777
928
  pd.DataFrame: DataFrame with two new columns called 'residuals' and 'residuals_abs'
778
929
  """
930
+ # Check for prediction column
931
+ if "prediction" not in prediction_df.columns:
932
+ self.log.warning("No 'prediction' column found. Cannot compute residuals.")
933
+ return prediction_df
779
934
 
780
- # Compute the residuals
781
935
  y_true = prediction_df[target_column]
782
- prediction_col = "prediction" if "prediction" in prediction_df.columns else "predictions"
783
- y_pred = prediction_df[prediction_col]
936
+ y_pred = prediction_df["prediction"]
784
937
 
785
938
  # Check for classification scenario
786
939
  if not pd.api.types.is_numeric_dtype(y_true) or not pd.api.types.is_numeric_dtype(y_pred):
@@ -794,118 +947,62 @@ class EndpointCore(Artifact):
794
947
 
795
948
  return prediction_df
796
949
 
797
- @staticmethod
798
- def validate_proba_columns(prediction_df: pd.DataFrame, class_labels: list, guessing: bool = False):
799
- """Ensure probability columns are correctly aligned with class labels
800
-
801
- Args:
802
- prediction_df (pd.DataFrame): DataFrame with the prediction results
803
- class_labels (list): List of class labels
804
- guessing (bool, optional): Whether we're guessing the class labels. Defaults to False.
805
- """
806
- proba_columns = [col.replace("_proba", "") for col in prediction_df.columns if col.endswith("_proba")]
807
-
808
- if sorted(class_labels) != sorted(proba_columns):
809
- if guessing:
810
- raise ValueError(f"_proba columns {proba_columns} != GUESSED class_labels {class_labels}!")
811
- else:
812
- raise ValueError(f"_proba columns {proba_columns} != class_labels {class_labels}!")
813
-
814
- def classification_metrics(self, target_column: str, prediction_df: pd.DataFrame) -> pd.DataFrame:
950
+ def classification_metrics(
951
+ self, target_column: str, prediction_df: pd.DataFrame, prediction_col: str = "prediction"
952
+ ) -> pd.DataFrame:
815
953
  """Compute the performance metrics for this Endpoint
816
954
 
817
955
  Args:
818
956
  target_column (str): Name of the target column
819
957
  prediction_df (pd.DataFrame): DataFrame with the prediction results
958
+ prediction_col (str): Name of the prediction column (default: "prediction")
820
959
 
821
960
  Returns:
822
961
  pd.DataFrame: DataFrame with the performance metrics
823
962
  """
824
- # Get the class labels from the model
963
+ # Get class labels from the model (metrics_utils will infer if None)
825
964
  class_labels = ModelCore(self.model_name).class_labels()
826
- if class_labels is None:
827
- self.log.warning(
828
- "Class labels not found in the model. Guessing class labels from the prediction DataFrame."
829
- )
830
- class_labels = prediction_df[target_column].unique().tolist()
831
- self.validate_proba_columns(prediction_df, class_labels, guessing=True)
832
- else:
833
- self.validate_proba_columns(prediction_df, class_labels)
834
-
835
- # Calculate precision, recall, fscore, and support, handling zero division
836
- prediction_col = "prediction" if "prediction" in prediction_df.columns else "predictions"
837
- scores = precision_recall_fscore_support(
838
- prediction_df[target_column],
839
- prediction_df[prediction_col],
840
- average=None,
841
- labels=class_labels,
842
- zero_division=0,
843
- )
844
-
845
- # Identify the probability columns and keep them as a Pandas DataFrame
846
- proba_columns = [f"{label}_proba" for label in class_labels]
847
- y_score = prediction_df[proba_columns]
848
-
849
- # One-hot encode the true labels using all class labels (fit with class_labels)
850
- encoder = OneHotEncoder(categories=[class_labels], sparse_output=False)
851
- y_true = encoder.fit_transform(prediction_df[[target_column]])
852
-
853
- # Calculate ROC AUC per label and handle exceptions for missing classes
854
- roc_auc_per_label = []
855
- for i, label in enumerate(class_labels):
856
- try:
857
- roc_auc = roc_auc_score(y_true[:, i], y_score.iloc[:, i])
858
- except ValueError as e:
859
- self.log.warning(f"ROC AUC calculation failed for label {label}.")
860
- self.log.warning(f"{str(e)}")
861
- roc_auc = 0.0
862
- roc_auc_per_label.append(roc_auc)
863
-
864
- # Put the scores into a DataFrame
865
- score_df = pd.DataFrame(
866
- {
867
- target_column: class_labels,
868
- "precision": scores[0],
869
- "recall": scores[1],
870
- "fscore": scores[2],
871
- "roc_auc": roc_auc_per_label,
872
- "support": scores[3],
873
- }
874
- )
875
- return score_df
965
+ return compute_classification_metrics(prediction_df, target_column, class_labels, prediction_col)
876
966
 
877
967
  def generate_confusion_matrix(self, target_column: str, prediction_df: pd.DataFrame) -> pd.DataFrame:
878
968
  """Compute the confusion matrix for this Endpoint
969
+
879
970
  Args:
880
971
  target_column (str): Name of the target column
881
972
  prediction_df (pd.DataFrame): DataFrame with the prediction results
973
+
882
974
  Returns:
883
975
  pd.DataFrame: DataFrame with the confusion matrix
884
976
  """
977
+ # Check for prediction column
978
+ if "prediction" not in prediction_df.columns:
979
+ self.log.warning("No 'prediction' column found in DataFrame")
980
+ return pd.DataFrame()
981
+
982
+ # Drop rows with NaN predictions (can't include in confusion matrix)
983
+ nan_mask = prediction_df["prediction"].isna()
984
+ if nan_mask.any():
985
+ n_nan = nan_mask.sum()
986
+ self.log.warning(f"Dropping {n_nan} rows with NaN predictions for confusion matrix")
987
+ prediction_df = prediction_df[~nan_mask].copy()
885
988
 
886
989
  y_true = prediction_df[target_column]
887
- prediction_col = "prediction" if "prediction" in prediction_df.columns else "predictions"
888
- y_pred = prediction_df[prediction_col]
990
+ y_pred = prediction_df["prediction"]
889
991
 
890
- # Check if our model has class labels, if not we'll use the unique labels in the prediction
891
- class_labels = ModelCore(self.model_name).class_labels()
892
- if class_labels is None:
893
- class_labels = sorted(list(set(y_true) | set(y_pred)))
992
+ # Get model class labels
993
+ model_class_labels = ModelCore(self.model_name).class_labels()
894
994
 
895
- # Compute the confusion matrix (sklearn confusion_matrix)
896
- conf_mtx = confusion_matrix(y_true, y_pred, labels=class_labels)
995
+ # Use model labels if available, otherwise infer from data
996
+ if model_class_labels:
997
+ self.log.important("Using model class labels for confusion matrix ordering...")
998
+ labels = model_class_labels
999
+ else:
1000
+ labels = sorted(list(set(y_true) | set(y_pred)))
897
1001
 
898
- # Create a DataFrame
899
- conf_mtx_df = pd.DataFrame(conf_mtx, index=class_labels, columns=class_labels)
1002
+ # Compute confusion matrix and create DataFrame
1003
+ conf_mtx = confusion_matrix(y_true, y_pred, labels=labels)
1004
+ conf_mtx_df = pd.DataFrame(conf_mtx, index=labels, columns=labels)
900
1005
  conf_mtx_df.index.name = "labels"
901
-
902
- # Check if our model has class labels. If so make the index and columns ordered
903
- model_class_labels = ModelCore(self.model_name).class_labels()
904
- if model_class_labels:
905
- self.log.important("Reordering the confusion matrix based on model class labels...")
906
- conf_mtx_df.index = pd.Categorical(conf_mtx_df.index, categories=model_class_labels, ordered=True)
907
- conf_mtx_df.columns = pd.Categorical(conf_mtx_df.columns, categories=model_class_labels, ordered=True)
908
- conf_mtx_df = conf_mtx_df.sort_index().sort_index(axis=1)
909
1006
  return conf_mtx_df
910
1007
 
911
1008
  def endpoint_config_name(self) -> str:
@@ -932,9 +1029,9 @@ class EndpointCore(Artifact):
932
1029
  self.upsert_workbench_meta({"workbench_input": input})
933
1030
 
934
1031
  def delete(self):
935
- """ "Delete an existing Endpoint: Underlying Models, Configuration, and Endpoint"""
1032
+ """Delete an existing Endpoint: Underlying Models, Configuration, and Endpoint"""
936
1033
  if not self.exists():
937
- self.log.warning(f"Trying to delete an Model that doesn't exist: {self.name}")
1034
+ self.log.warning(f"Trying to delete an Endpoint that doesn't exist: {self.name}")
938
1035
 
939
1036
  # Remove this endpoint from the list of registered endpoints
940
1037
  self.log.info(f"Removing {self.name} from the list of registered endpoints...")
@@ -975,12 +1072,23 @@ class EndpointCore(Artifact):
975
1072
  cls.log.info(f"Deleting Monitoring Schedule {schedule['MonitoringScheduleName']}...")
976
1073
  cls.sm_client.delete_monitoring_schedule(MonitoringScheduleName=schedule["MonitoringScheduleName"])
977
1074
 
978
- # Recursively delete all endpoint S3 artifacts (inference, data capture, monitoring, etc)
979
- base_endpoint_path = f"{cls.endpoints_s3_path}/{endpoint_name}"
980
- s3_objects = wr.s3.list_objects(base_endpoint_path, boto3_session=cls.boto3_session)
981
- cls.log.info(f"Deleting S3 Objects at {base_endpoint_path}...")
982
- cls.log.info(f"{s3_objects}")
983
- wr.s3.delete_objects(s3_objects, boto3_session=cls.boto3_session)
1075
+ # Recursively delete all endpoint S3 artifacts (inference, etc)
1076
+ # Note: We do not want to delete the data_capture/ files since these
1077
+ # might be used for collection and data drift analysis
1078
+ base_endpoint_path = f"{cls.endpoints_s3_path}/{endpoint_name}/"
1079
+ all_s3_objects = wr.s3.list_objects(base_endpoint_path, boto3_session=cls.boto3_session)
1080
+
1081
+ # Filter out objects that contain 'data_capture/' in their path
1082
+ s3_objects_to_delete = [obj for obj in all_s3_objects if "/data_capture/" not in obj]
1083
+ cls.log.info(f"Found {len(all_s3_objects)} total objects at {base_endpoint_path}")
1084
+ cls.log.info(f"Filtering out data_capture files, will delete {len(s3_objects_to_delete)} objects...")
1085
+ cls.log.info(f"Objects to delete: {s3_objects_to_delete}")
1086
+
1087
+ if s3_objects_to_delete:
1088
+ wr.s3.delete_objects(s3_objects_to_delete, boto3_session=cls.boto3_session)
1089
+ cls.log.info(f"Successfully deleted {len(s3_objects_to_delete)} objects")
1090
+ else:
1091
+ cls.log.info("No objects to delete (only data_capture files found)")
984
1092
 
985
1093
  # Delete any dataframes that were stored in the Dataframe Cache
986
1094
  cls.log.info("Deleting Dataframe Cache...")
@@ -1031,7 +1139,7 @@ class EndpointCore(Artifact):
1031
1139
  if __name__ == "__main__":
1032
1140
  """Exercise the Endpoint Class"""
1033
1141
  from workbench.api import FeatureSet
1034
- from workbench.utils.endpoint_utils import fs_evaluation_data
1142
+ from workbench.utils.endpoint_utils import get_evaluation_data
1035
1143
  import random
1036
1144
 
1037
1145
  # Grab an EndpointCore object and pull some information from it
@@ -1039,7 +1147,7 @@ if __name__ == "__main__":
1039
1147
 
1040
1148
  # Test various error conditions (set row 42 length to pd.NA)
1041
1149
  # Note: This test should return ALL rows
1042
- my_eval_df = fs_evaluation_data(my_endpoint)
1150
+ my_eval_df = get_evaluation_data(my_endpoint)
1043
1151
  my_eval_df.at[42, "length"] = pd.NA
1044
1152
  pred_results = my_endpoint.inference(my_eval_df, drop_error_rows=True)
1045
1153
  print(f"Sent rows: {len(my_eval_df)}")
@@ -1047,6 +1155,9 @@ if __name__ == "__main__":
1047
1155
  assert len(pred_results) == len(my_eval_df), "Predictions should match the number of sent rows"
1048
1156
 
1049
1157
  # Now we put in an invalid value
1158
+ print("*" * 80)
1159
+ print("NOW TESTING ERROR CONDITIONS...")
1160
+ print("*" * 80)
1050
1161
  my_eval_df.at[42, "length"] = "invalid_value"
1051
1162
  pred_results = my_endpoint.inference(my_eval_df, drop_error_rows=True)
1052
1163
  print(f"Sent rows: {len(my_eval_df)}")
@@ -1086,13 +1197,20 @@ if __name__ == "__main__":
1086
1197
  df = fs.pull_dataframe()[:100]
1087
1198
  cap_df = df.copy()
1088
1199
  cap_df.columns = [col.upper() for col in cap_df.columns]
1089
- my_endpoint._predict(cap_df)
1200
+ my_endpoint.inference(cap_df)
1090
1201
 
1091
1202
  # Boolean Type Test
1092
1203
  df["bool_column"] = [random.choice([True, False]) for _ in range(len(df))]
1093
- result_df = my_endpoint._predict(df)
1204
+ result_df = my_endpoint.inference(df)
1094
1205
  assert result_df["bool_column"].dtype == bool
1095
1206
 
1207
+ # Missing Feature Test
1208
+ missing_df = df.drop(columns=["length"])
1209
+ try:
1210
+ my_endpoint.inference(missing_df)
1211
+ except ValueError as e:
1212
+ print(f"Expected error for missing feature: {e}")
1213
+
1096
1214
  # Run Auto Inference on the Endpoint (uses the FeatureSet)
1097
1215
  print("Running Auto Inference...")
1098
1216
  my_endpoint.auto_inference()
@@ -1100,13 +1218,21 @@ if __name__ == "__main__":
1100
1218
  # Run Inference where we provide the data
1101
1219
  # Note: This dataframe could be from a FeatureSet or any other source
1102
1220
  print("Running Inference...")
1103
- my_eval_df = fs_evaluation_data(my_endpoint)
1221
+ my_eval_df = get_evaluation_data(my_endpoint)
1104
1222
  pred_results = my_endpoint.inference(my_eval_df)
1105
1223
 
1106
1224
  # Now set capture=True to save inference results and metrics
1107
- my_eval_df = fs_evaluation_data(my_endpoint)
1225
+ my_eval_df = get_evaluation_data(my_endpoint)
1108
1226
  pred_results = my_endpoint.inference(my_eval_df, capture_name="holdout_xyz")
1109
1227
 
1228
+ # Run predictions using the fast_inference method
1229
+ fast_results = my_endpoint.fast_inference(my_eval_df)
1230
+
1231
+ # Test the cross_fold_inference method
1232
+ print("Running Cross-Fold Inference...")
1233
+ all_results = my_endpoint.cross_fold_inference()
1234
+ print(all_results)
1235
+
1110
1236
  # Run Inference and metrics for a Classification Endpoint
1111
1237
  class_endpoint = EndpointCore("wine-classification")
1112
1238
  auto_predictions = class_endpoint.auto_inference()
@@ -1115,8 +1241,11 @@ if __name__ == "__main__":
1115
1241
  target = "wine_class"
1116
1242
  print(class_endpoint.generate_confusion_matrix(target, auto_predictions))
1117
1243
 
1118
- # Run predictions using the fast_inference method
1119
- fast_results = my_endpoint.fast_inference(my_eval_df)
1244
+ # Test the cross_fold_inference method
1245
+ print("Running Cross-Fold Inference...")
1246
+ all_results = class_endpoint.cross_fold_inference()
1247
+ print(all_results)
1248
+ print("All done...")
1120
1249
 
1121
1250
  # Test the class method delete (commented out for now)
1122
1251
  # from workbench.api import Model