workbench 0.8.202__py3-none-any.whl → 0.8.220__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of workbench might be problematic. Click here for more details.

Files changed (84) hide show
  1. workbench/algorithms/dataframe/compound_dataset_overlap.py +321 -0
  2. workbench/algorithms/dataframe/feature_space_proximity.py +168 -75
  3. workbench/algorithms/dataframe/fingerprint_proximity.py +421 -85
  4. workbench/algorithms/dataframe/projection_2d.py +44 -21
  5. workbench/algorithms/dataframe/proximity.py +78 -150
  6. workbench/algorithms/graph/light/proximity_graph.py +5 -5
  7. workbench/algorithms/models/cleanlab_model.py +382 -0
  8. workbench/algorithms/models/noise_model.py +388 -0
  9. workbench/algorithms/sql/outliers.py +3 -3
  10. workbench/api/__init__.py +3 -0
  11. workbench/api/df_store.py +17 -108
  12. workbench/api/endpoint.py +13 -11
  13. workbench/api/feature_set.py +111 -8
  14. workbench/api/meta_model.py +289 -0
  15. workbench/api/model.py +45 -12
  16. workbench/api/parameter_store.py +3 -52
  17. workbench/cached/cached_model.py +4 -4
  18. workbench/core/artifacts/artifact.py +5 -5
  19. workbench/core/artifacts/df_store_core.py +114 -0
  20. workbench/core/artifacts/endpoint_core.py +228 -237
  21. workbench/core/artifacts/feature_set_core.py +185 -230
  22. workbench/core/artifacts/model_core.py +34 -26
  23. workbench/core/artifacts/parameter_store_core.py +98 -0
  24. workbench/core/pipelines/pipeline_executor.py +1 -1
  25. workbench/core/transforms/features_to_model/features_to_model.py +22 -10
  26. workbench/core/transforms/model_to_endpoint/model_to_endpoint.py +41 -10
  27. workbench/core/transforms/pandas_transforms/pandas_to_features.py +11 -2
  28. workbench/model_script_utils/model_script_utils.py +339 -0
  29. workbench/model_script_utils/pytorch_utils.py +405 -0
  30. workbench/model_script_utils/uq_harness.py +278 -0
  31. workbench/model_scripts/chemprop/chemprop.template +428 -631
  32. workbench/model_scripts/chemprop/generated_model_script.py +432 -635
  33. workbench/model_scripts/chemprop/model_script_utils.py +339 -0
  34. workbench/model_scripts/chemprop/requirements.txt +2 -10
  35. workbench/model_scripts/custom_models/chem_info/fingerprints.py +87 -46
  36. workbench/model_scripts/custom_models/proximity/feature_space_proximity.py +194 -0
  37. workbench/model_scripts/custom_models/proximity/feature_space_proximity.template +6 -6
  38. workbench/model_scripts/custom_models/uq_models/feature_space_proximity.py +194 -0
  39. workbench/model_scripts/meta_model/generated_model_script.py +209 -0
  40. workbench/model_scripts/meta_model/meta_model.template +209 -0
  41. workbench/model_scripts/pytorch_model/generated_model_script.py +374 -613
  42. workbench/model_scripts/pytorch_model/model_script_utils.py +339 -0
  43. workbench/model_scripts/pytorch_model/pytorch.template +370 -609
  44. workbench/model_scripts/pytorch_model/pytorch_utils.py +405 -0
  45. workbench/model_scripts/pytorch_model/requirements.txt +1 -1
  46. workbench/model_scripts/pytorch_model/uq_harness.py +278 -0
  47. workbench/model_scripts/script_generation.py +6 -5
  48. workbench/model_scripts/uq_models/generated_model_script.py +65 -422
  49. workbench/model_scripts/xgb_model/generated_model_script.py +372 -395
  50. workbench/model_scripts/xgb_model/model_script_utils.py +339 -0
  51. workbench/model_scripts/xgb_model/uq_harness.py +278 -0
  52. workbench/model_scripts/xgb_model/xgb_model.template +366 -396
  53. workbench/repl/workbench_shell.py +0 -5
  54. workbench/resources/open_source_api.key +1 -1
  55. workbench/scripts/endpoint_test.py +2 -2
  56. workbench/scripts/meta_model_sim.py +35 -0
  57. workbench/scripts/training_test.py +85 -0
  58. workbench/utils/chem_utils/fingerprints.py +87 -46
  59. workbench/utils/chem_utils/projections.py +16 -6
  60. workbench/utils/chemprop_utils.py +36 -655
  61. workbench/utils/meta_model_simulator.py +499 -0
  62. workbench/utils/metrics_utils.py +256 -0
  63. workbench/utils/model_utils.py +192 -54
  64. workbench/utils/pytorch_utils.py +33 -472
  65. workbench/utils/shap_utils.py +1 -55
  66. workbench/utils/xgboost_local_crossfold.py +267 -0
  67. workbench/utils/xgboost_model_utils.py +49 -356
  68. workbench/web_interface/components/model_plot.py +7 -1
  69. workbench/web_interface/components/plugins/model_details.py +30 -68
  70. workbench/web_interface/components/plugins/scatter_plot.py +4 -8
  71. {workbench-0.8.202.dist-info → workbench-0.8.220.dist-info}/METADATA +6 -5
  72. {workbench-0.8.202.dist-info → workbench-0.8.220.dist-info}/RECORD +76 -60
  73. {workbench-0.8.202.dist-info → workbench-0.8.220.dist-info}/entry_points.txt +2 -0
  74. workbench/core/cloud_platform/aws/aws_df_store.py +0 -404
  75. workbench/core/cloud_platform/aws/aws_parameter_store.py +0 -296
  76. workbench/model_scripts/custom_models/meta_endpoints/example.py +0 -53
  77. workbench/model_scripts/custom_models/proximity/proximity.py +0 -410
  78. workbench/model_scripts/custom_models/uq_models/meta_uq.template +0 -377
  79. workbench/model_scripts/custom_models/uq_models/proximity.py +0 -410
  80. workbench/model_scripts/uq_models/mapie.template +0 -605
  81. workbench/model_scripts/uq_models/requirements.txt +0 -1
  82. {workbench-0.8.202.dist-info → workbench-0.8.220.dist-info}/WHEEL +0 -0
  83. {workbench-0.8.202.dist-info → workbench-0.8.220.dist-info}/licenses/LICENSE +0 -0
  84. {workbench-0.8.202.dist-info → workbench-0.8.220.dist-info}/top_level.txt +0 -0
@@ -12,16 +12,8 @@ from typing import Union, Optional
12
12
  import hashlib
13
13
 
14
14
  # Model Performance Scores
15
- from sklearn.metrics import (
16
- mean_absolute_error,
17
- r2_score,
18
- median_absolute_error,
19
- roc_auc_score,
20
- confusion_matrix,
21
- precision_recall_fscore_support,
22
- mean_squared_error,
23
- )
24
- from sklearn.preprocessing import OneHotEncoder
15
+ from sklearn.metrics import confusion_matrix
16
+ from workbench.utils.metrics_utils import compute_regression_metrics, compute_classification_metrics
25
17
 
26
18
  # SageMaker Imports
27
19
  from sagemaker.serializers import CSVSerializer
@@ -35,7 +27,7 @@ from workbench.utils.endpoint_metrics import EndpointMetrics
35
27
  from workbench.utils.cache import Cache
36
28
  from workbench.utils.s3_utils import compute_s3_object_hash
37
29
  from workbench.utils.model_utils import uq_metrics
38
- from workbench.utils.xgboost_model_utils import cross_fold_inference as xgboost_cross_fold
30
+ from workbench.utils.xgboost_model_utils import pull_cv_results as xgboost_pull_cv
39
31
  from workbench.utils.pytorch_utils import pull_cv_results as pytorch_pull_cv
40
32
  from workbench.utils.chemprop_utils import pull_cv_results as chemprop_pull_cv
41
33
  from workbench_bridges.endpoints.fast_inference import fast_inference
@@ -338,12 +330,8 @@ class EndpointCore(Artifact):
338
330
  self.details()
339
331
  return True
340
332
 
341
- def auto_inference(self, capture: bool = False) -> pd.DataFrame:
342
- """Run inference on the endpoint using FeatureSet data
343
-
344
- Args:
345
- capture (bool, optional): Capture the inference results and metrics (default=False)
346
- """
333
+ def auto_inference(self) -> pd.DataFrame:
334
+ """Run inference on the endpoint using the test data from the model training view"""
347
335
 
348
336
  # Sanity Check that we have a model
349
337
  model = ModelCore(self.get_input())
@@ -351,22 +339,40 @@ class EndpointCore(Artifact):
351
339
  self.log.error("No model found for this endpoint. Returning empty DataFrame.")
352
340
  return pd.DataFrame()
353
341
 
354
- # Now get the FeatureSet and make sure it exists
355
- fs = FeatureSetCore(model.get_input())
356
- if not fs.exists():
357
- self.log.error("No FeatureSet found for this endpoint. Returning empty DataFrame.")
342
+ # Grab the evaluation data from the Model's training view
343
+ all_df = model.training_view().pull_dataframe()
344
+ eval_df = all_df[~all_df["training"]]
345
+
346
+ # Remove AWS created columns
347
+ aws_cols = ["write_time", "api_invocation_time", "is_deleted", "event_time"]
348
+ eval_df = eval_df.drop(columns=aws_cols, errors="ignore")
349
+
350
+ # Run inference
351
+ return self.inference(eval_df, "auto_inference")
352
+
353
+ def full_inference(self) -> pd.DataFrame:
354
+ """Run inference on the endpoint using all the data from the model training view"""
355
+
356
+ # Sanity Check that we have a model
357
+ model = ModelCore(self.get_input())
358
+ if not model.exists():
359
+ self.log.error("No model found for this endpoint. Returning empty DataFrame.")
358
360
  return pd.DataFrame()
359
361
 
360
- # Grab the evaluation data from the FeatureSet
361
- table = model.training_view().table
362
- eval_df = fs.query(f'SELECT * FROM "{table}" where training = FALSE')
363
- capture_name = "auto_inference" if capture else None
364
- return self.inference(eval_df, capture_name, id_column=fs.id_column)
362
+ # Grab the full data from the Model's training view
363
+ eval_df = model.training_view().pull_dataframe()
364
+
365
+ # Remove AWS created columns
366
+ aws_cols = ["write_time", "api_invocation_time", "is_deleted", "event_time"]
367
+ eval_df = eval_df.drop(columns=aws_cols, errors="ignore")
368
+
369
+ # Run inference
370
+ return self.inference(eval_df, "full_inference")
365
371
 
366
372
  def inference(
367
373
  self, eval_df: pd.DataFrame, capture_name: str = None, id_column: str = None, drop_error_rows: bool = False
368
374
  ) -> pd.DataFrame:
369
- """Run inference and compute performance metrics with optional capture
375
+ """Run inference on the Endpoint using the provided DataFrame
370
376
 
371
377
  Args:
372
378
  eval_df (pd.DataFrame): DataFrame to run predictions on (must have superset of features)
@@ -389,7 +395,7 @@ class EndpointCore(Artifact):
389
395
  # Grab the model features and target column
390
396
  model = ModelCore(self.model_name)
391
397
  features = model.features()
392
- target_column = model.target()
398
+ targets = model.target() # Note: We have multi-target models (so this could be a list)
393
399
 
394
400
  # Run predictions on the evaluation data
395
401
  prediction_df = self._predict(eval_df, features, drop_error_rows)
@@ -397,19 +403,25 @@ class EndpointCore(Artifact):
397
403
  self.log.warning("No predictions were made. Returning empty DataFrame.")
398
404
  return prediction_df
399
405
 
406
+ # Normalize targets to handle both string and list formats
407
+ if isinstance(targets, list):
408
+ primary_target = targets[0] if targets else None
409
+ else:
410
+ primary_target = targets
411
+
400
412
  # Sanity Check that the target column is present
401
- if target_column and (target_column not in prediction_df.columns):
402
- self.log.important(f"Target Column {target_column} not found in prediction_df!")
413
+ if primary_target not in prediction_df.columns:
414
+ self.log.important(f"Target Column {primary_target} not found in prediction_df!")
403
415
  self.log.important("In order to compute metrics, the target column must be present!")
404
416
  metrics = pd.DataFrame()
405
417
 
406
418
  # Compute the standard performance metrics for this model
407
419
  else:
408
420
  if model.model_type in [ModelType.REGRESSOR, ModelType.UQ_REGRESSOR, ModelType.ENSEMBLE_REGRESSOR]:
409
- prediction_df = self.residuals(target_column, prediction_df)
410
- metrics = self.regression_metrics(target_column, prediction_df)
421
+ prediction_df = self.residuals(primary_target, prediction_df)
422
+ metrics = self.regression_metrics(primary_target, prediction_df)
411
423
  elif model.model_type == ModelType.CLASSIFIER:
412
- metrics = self.classification_metrics(target_column, prediction_df)
424
+ metrics = self.classification_metrics(primary_target, prediction_df)
413
425
  else:
414
426
  # For other model types, we don't compute metrics
415
427
  self.log.info(f"Model Type: {model.model_type} doesn't have metrics...")
@@ -420,30 +432,77 @@ class EndpointCore(Artifact):
420
432
  print(metrics.head())
421
433
 
422
434
  # Capture the inference results and metrics
423
- if capture_name is not None:
435
+ if primary_target and capture_name:
424
436
 
425
437
  # If we don't have an id_column, we'll pull it from the model's FeatureSet
426
438
  if id_column is None:
427
439
  fs = FeatureSetCore(model.get_input())
428
440
  id_column = fs.id_column
429
- description = capture_name.replace("_", " ").title()
430
- self._capture_inference_results(
431
- capture_name, prediction_df, target_column, model.model_type, metrics, description, features, id_column
432
- )
433
-
434
- # For UQ Models we also capture the uncertainty metrics
435
- if model.model_type in [ModelType.UQ_REGRESSOR]:
436
- metrics = uq_metrics(prediction_df, target_column)
441
+
442
+ # Normalize targets to a list for iteration
443
+ target_list = targets if isinstance(targets, list) else [targets]
444
+ primary_target = target_list[0]
445
+
446
+ # For single-target models (99% of cases), just save with capture_name
447
+ # For multi-target models, save each as {prefix}_{target} plus primary as capture_name
448
+ is_multi_target = len(target_list) > 1
449
+
450
+ if is_multi_target:
451
+ prefix = "auto" if capture_name == "auto_inference" else capture_name
452
+
453
+ for target in target_list:
454
+ # Drop rows with NaN target values for metrics/plots
455
+ target_df = prediction_df.dropna(subset=[target])
456
+
457
+ # For multi-target models, prediction column is {target}_pred, otherwise "prediction"
458
+ pred_col = f"{target}_pred" if is_multi_target else "prediction"
459
+
460
+ # Compute per-target metrics
461
+ if model.model_type in [ModelType.REGRESSOR, ModelType.UQ_REGRESSOR, ModelType.ENSEMBLE_REGRESSOR]:
462
+ target_metrics = self.regression_metrics(target, target_df, prediction_col=pred_col)
463
+ elif model.model_type == ModelType.CLASSIFIER:
464
+ target_metrics = self.classification_metrics(target, target_df, prediction_col=pred_col)
465
+ else:
466
+ target_metrics = pd.DataFrame()
467
+
468
+ if is_multi_target:
469
+ # Multi-target: save as {prefix}_{target}
470
+ target_capture_name = f"{prefix}_{target}"
471
+ description = target_capture_name.replace("_", " ").title()
472
+ self._capture_inference_results(
473
+ target_capture_name,
474
+ target_df,
475
+ target,
476
+ model.model_type,
477
+ target_metrics,
478
+ description,
479
+ features,
480
+ id_column,
481
+ )
482
+
483
+ # Save primary target (or single target) with original capture_name
484
+ if target == primary_target:
485
+ self._capture_inference_results(
486
+ capture_name,
487
+ target_df,
488
+ target,
489
+ model.model_type,
490
+ target_metrics,
491
+ capture_name.replace("_", " ").title(),
492
+ features,
493
+ id_column,
494
+ )
495
+
496
+ # Capture uncertainty metrics if prediction_std is available (UQ, ChemProp, etc.)
497
+ if "prediction_std" in prediction_df.columns:
498
+ metrics = uq_metrics(prediction_df, primary_target)
437
499
  self.param_store.upsert(f"/workbench/models/{model.name}/inference/{capture_name}", metrics)
438
500
 
439
501
  # Return the prediction DataFrame
440
502
  return prediction_df
441
503
 
442
- def cross_fold_inference(self, nfolds: int = 5) -> pd.DataFrame:
443
- """Run cross-fold inference (only works for XGBoost models)
444
-
445
- Args:
446
- nfolds (int): Number of folds to use for cross-fold (default: 5)
504
+ def cross_fold_inference(self) -> pd.DataFrame:
505
+ """Pull cross-fold inference training results for this Endpoint's model
447
506
 
448
507
  Returns:
449
508
  pd.DataFrame: A DataFrame with cross fold predictions
@@ -455,8 +514,8 @@ class EndpointCore(Artifact):
455
514
  # Compute CrossFold (Metrics and Prediction Dataframe)
456
515
  # For PyTorch and ChemProp, pull pre-computed CV results from training
457
516
  if model.model_framework in [ModelFramework.UNKNOWN, ModelFramework.XGBOOST]:
458
- cross_fold_metrics, out_of_fold_df = xgboost_cross_fold(model, nfolds=nfolds)
459
- elif model.model_framework == ModelFramework.PYTORCH_TABULAR:
517
+ cross_fold_metrics, out_of_fold_df = xgboost_pull_cv(model)
518
+ elif model.model_framework == ModelFramework.PYTORCH:
460
519
  cross_fold_metrics, out_of_fold_df = pytorch_pull_cv(model)
461
520
  elif model.model_framework == ModelFramework.CHEMPROP:
462
521
  cross_fold_metrics, out_of_fold_df = chemprop_pull_cv(model)
@@ -476,58 +535,74 @@ class EndpointCore(Artifact):
476
535
  return out_of_fold_df
477
536
 
478
537
  # Capture the results
479
- capture_name = "full_cross_fold"
480
- description = capture_name.replace("_", " ").title()
481
- target_column = model.target()
538
+ targets = model.target() # Note: We have multi-target models (so this could be a list)
482
539
  model_type = model.model_type
483
540
 
484
541
  # Get the id_column from the model's FeatureSet
485
542
  fs = FeatureSetCore(model.get_input())
486
543
  id_column = fs.id_column
487
544
 
488
- # Is this a UQ Model? If so, run full inference and merge the results
489
- additional_columns = []
490
- if model.model_framework == ModelFramework.XGBOOST and model_type == ModelType.UQ_REGRESSOR:
491
- self.log.important("UQ Regressor detected, running full inference to get uncertainty estimates...")
545
+ # Normalize targets to a list for iteration
546
+ target_list = targets if isinstance(targets, list) else [targets]
547
+ primary_target = target_list[0]
492
548
 
493
- # Get the training view dataframe for inference
494
- training_df = model.training_view().pull_dataframe()
549
+ # Collect UQ columns (q_*, confidence) for additional tracking
550
+ additional_columns = [col for col in out_of_fold_df.columns if col.startswith("q_") or col == "confidence"]
551
+ if additional_columns:
552
+ self.log.info(f"UQ columns from training: {', '.join(additional_columns)}")
495
553
 
496
- # Run inference on the endpoint to get UQ outputs
497
- uq_df = self.inference(training_df)
554
+ # Capture uncertainty metrics if prediction_std is available (UQ, ChemProp, etc.)
555
+ if "prediction_std" in out_of_fold_df.columns:
556
+ metrics = uq_metrics(out_of_fold_df, primary_target)
557
+ self.param_store.upsert(f"/workbench/models/{model.name}/inference/full_cross_fold", metrics)
498
558
 
499
- # Identify UQ-specific columns (quantiles and prediction_std)
500
- uq_columns = [
501
- col for col in uq_df.columns if col.startswith("q_") or col == "prediction_std" or col == "confidence"
502
- ]
559
+ # For single-target models (99% of cases), just save as "full_cross_fold"
560
+ # For multi-target models, save each as cv_{target} plus primary as "full_cross_fold"
561
+ is_multi_target = len(target_list) > 1
503
562
 
504
- # Merge UQ columns with out-of-fold predictions
505
- if uq_columns:
506
- # Keep id_column and UQ columns, drop 'prediction' to avoid conflict when merging
507
- uq_df = uq_df[[id_column] + uq_columns]
563
+ for target in target_list:
564
+ # Drop rows with NaN target values for metrics/plots
565
+ target_df = out_of_fold_df.dropna(subset=[target])
508
566
 
509
- # Drop duplicates in uq_df based on id_column
510
- uq_df = uq_df.drop_duplicates(subset=[id_column])
567
+ # For multi-target models, prediction column is {target}_pred, otherwise "prediction"
568
+ pred_col = f"{target}_pred" if is_multi_target else "prediction"
511
569
 
512
- # Merge UQ columns into out_of_fold_df
513
- out_of_fold_df = pd.merge(out_of_fold_df, uq_df, on=id_column, how="left")
514
- additional_columns = uq_columns
515
- self.log.info(f"Added UQ columns: {', '.join(additional_columns)}")
516
-
517
- # Also compute UQ metrics
518
- metrics = uq_metrics(out_of_fold_df, target_column)
519
- self.param_store.upsert(f"/workbench/models/{model.name}/inference/{capture_name}", metrics)
570
+ # Compute per-target metrics
571
+ if model_type in [ModelType.REGRESSOR, ModelType.UQ_REGRESSOR, ModelType.ENSEMBLE_REGRESSOR]:
572
+ target_metrics = self.regression_metrics(target, target_df, prediction_col=pred_col)
573
+ elif model_type == ModelType.CLASSIFIER:
574
+ target_metrics = self.classification_metrics(target, target_df, prediction_col=pred_col)
575
+ else:
576
+ target_metrics = pd.DataFrame()
577
+
578
+ if is_multi_target:
579
+ # Multi-target: save as cv_{target}
580
+ capture_name = f"cv_{target}"
581
+ description = capture_name.replace("_", " ").title()
582
+ self._capture_inference_results(
583
+ capture_name,
584
+ target_df,
585
+ target,
586
+ model_type,
587
+ target_metrics,
588
+ description,
589
+ features=additional_columns,
590
+ id_column=id_column,
591
+ )
592
+
593
+ # Save primary target (or single target) as "full_cross_fold"
594
+ if target == primary_target:
595
+ self._capture_inference_results(
596
+ "full_cross_fold",
597
+ target_df,
598
+ target,
599
+ model_type,
600
+ target_metrics,
601
+ "Full Cross Fold",
602
+ features=additional_columns,
603
+ id_column=id_column,
604
+ )
520
605
 
521
- self._capture_inference_results(
522
- capture_name,
523
- out_of_fold_df,
524
- target_column,
525
- model_type,
526
- cross_fold_metrics,
527
- description,
528
- features=additional_columns,
529
- id_column=id_column,
530
- )
531
606
  return out_of_fold_df
532
607
 
533
608
  def fast_inference(self, eval_df: pd.DataFrame, threads: int = 4) -> pd.DataFrame:
@@ -737,19 +812,19 @@ class EndpointCore(Artifact):
737
812
  self,
738
813
  capture_name: str,
739
814
  pred_results_df: pd.DataFrame,
740
- target_column: str,
815
+ target: str,
741
816
  model_type: ModelType,
742
817
  metrics: pd.DataFrame,
743
818
  description: str,
744
819
  features: list,
745
820
  id_column: str = None,
746
821
  ):
747
- """Internal: Capture the inference results and metrics to S3
822
+ """Internal: Capture the inference results and metrics to S3 for a single target
748
823
 
749
824
  Args:
750
825
  capture_name (str): Name of the inference capture
751
826
  pred_results_df (pd.DataFrame): DataFrame with the prediction results
752
- target_column (str): Name of the target column
827
+ target (str): Target column name
753
828
  model_type (ModelType): Type of the model (e.g. REGRESSOR, CLASSIFIER)
754
829
  metrics (pd.DataFrame): DataFrame with the performance metrics
755
830
  description (str): Description of the inference results
@@ -780,26 +855,12 @@ class EndpointCore(Artifact):
780
855
  self.log.info(f"Writing metrics to {inference_capture_path}/inference_metrics.csv")
781
856
  wr.s3.to_csv(metrics, f"{inference_capture_path}/inference_metrics.csv", index=False)
782
857
 
783
- # Grab the ID column and target column if they are present
784
- output_columns = []
785
- if id_column and id_column in pred_results_df.columns:
786
- output_columns.append(id_column)
787
- if target_column in pred_results_df.columns:
788
- output_columns.append(target_column)
789
-
790
- # Grab the prediction column, any _proba columns, and UQ columns
791
- output_columns += [col for col in pred_results_df.columns if "prediction" in col]
792
- output_columns += [col for col in pred_results_df.columns if col.endswith("_proba")]
793
- output_columns += [col for col in pred_results_df.columns if col.startswith("q_") or col == "confidence"]
794
-
795
- # Write the predictions to our S3 Model Inference Folder
796
- self.log.info(f"Writing predictions to {inference_capture_path}/inference_predictions.csv")
797
- subset_df = pred_results_df[output_columns]
798
- wr.s3.to_csv(subset_df, f"{inference_capture_path}/inference_predictions.csv", index=False)
858
+ # Save the inference predictions for this target
859
+ self._save_target_inference(inference_capture_path, pred_results_df, target, id_column)
799
860
 
800
861
  # CLASSIFIER: Write the confusion matrix to our S3 Model Inference Folder
801
862
  if model_type == ModelType.CLASSIFIER:
802
- conf_mtx = self.generate_confusion_matrix(target_column, pred_results_df)
863
+ conf_mtx = self.generate_confusion_matrix(target, pred_results_df)
803
864
  self.log.info(f"Writing confusion matrix to {inference_capture_path}/inference_cm.csv")
804
865
  # Note: Unlike other dataframes here, we want to write the index (labels) to the CSV
805
866
  wr.s3.to_csv(conf_mtx, f"{inference_capture_path}/inference_cm.csv", index=True)
@@ -809,60 +870,54 @@ class EndpointCore(Artifact):
809
870
  model = ModelCore(self.model_name)
810
871
  model._load_inference_metrics(capture_name)
811
872
 
812
- def regression_metrics(self, target_column: str, prediction_df: pd.DataFrame) -> pd.DataFrame:
873
+ def _save_target_inference(
874
+ self,
875
+ inference_capture_path: str,
876
+ pred_results_df: pd.DataFrame,
877
+ target: str,
878
+ id_column: str = None,
879
+ ):
880
+ """Save inference results for a single target.
881
+
882
+ Args:
883
+ inference_capture_path (str): S3 path for inference capture
884
+ pred_results_df (pd.DataFrame): DataFrame with prediction results
885
+ target (str): Target column name
886
+ id_column (str, optional): Name of the ID column
887
+ """
888
+ cols = pred_results_df.columns
889
+
890
+ # Build output columns: id, target, prediction, prediction_std, UQ columns, proba columns
891
+ output_columns = []
892
+ if id_column and id_column in cols:
893
+ output_columns.append(id_column)
894
+ if target and target in cols:
895
+ output_columns.append(target)
896
+
897
+ output_columns += [c for c in ["prediction", "prediction_std"] if c in cols]
898
+
899
+ # Add UQ columns (q_*, confidence) and proba columns
900
+ output_columns += [c for c in cols if c.startswith("q_") or c == "confidence" or c.endswith("_proba")]
901
+
902
+ # Write the predictions to S3
903
+ output_file = f"{inference_capture_path}/inference_predictions.csv"
904
+ self.log.info(f"Writing predictions to {output_file}")
905
+ wr.s3.to_csv(pred_results_df[output_columns], output_file, index=False)
906
+
907
+ def regression_metrics(
908
+ self, target_column: str, prediction_df: pd.DataFrame, prediction_col: str = "prediction"
909
+ ) -> pd.DataFrame:
813
910
  """Compute the performance metrics for this Endpoint
911
+
814
912
  Args:
815
913
  target_column (str): Name of the target column
816
914
  prediction_df (pd.DataFrame): DataFrame with the prediction results
915
+ prediction_col (str): Name of the prediction column (default: "prediction")
916
+
817
917
  Returns:
818
918
  pd.DataFrame: DataFrame with the performance metrics
819
919
  """
820
-
821
- # Sanity Check the prediction DataFrame
822
- if prediction_df.empty:
823
- self.log.warning("No predictions were made. Returning empty DataFrame.")
824
- return pd.DataFrame()
825
-
826
- # Check for NaN values in target or prediction columns
827
- prediction_col = "prediction" if "prediction" in prediction_df.columns else "predictions"
828
- if prediction_df[target_column].isnull().any() or prediction_df[prediction_col].isnull().any():
829
- # Compute the number of NaN values in each column
830
- num_nan_target = prediction_df[target_column].isnull().sum()
831
- num_nan_prediction = prediction_df[prediction_col].isnull().sum()
832
- self.log.warning(
833
- f"NaNs Found: {target_column} {num_nan_target} and {prediction_col}: {num_nan_prediction}."
834
- )
835
- self.log.warning(
836
- "NaN values found in target or prediction columns. Dropping NaN rows for metric computation."
837
- )
838
- prediction_df = prediction_df.dropna(subset=[target_column, prediction_col])
839
-
840
- # Compute the metrics
841
- try:
842
- y_true = prediction_df[target_column]
843
- y_pred = prediction_df[prediction_col]
844
-
845
- mae = mean_absolute_error(y_true, y_pred)
846
- rmse = np.sqrt(mean_squared_error(y_true, y_pred))
847
- r2 = r2_score(y_true, y_pred)
848
- # Mean Absolute Percentage Error
849
- mape = np.mean(np.where(y_true != 0, np.abs((y_true - y_pred) / y_true), np.abs(y_true - y_pred))) * 100
850
- # Median Absolute Error
851
- medae = median_absolute_error(y_true, y_pred)
852
-
853
- # Organize and return the metrics
854
- metrics = {
855
- "MAE": round(mae, 3),
856
- "RMSE": round(rmse, 3),
857
- "R2": round(r2, 3),
858
- "MAPE": round(mape, 3),
859
- "MedAE": round(medae, 3),
860
- "NumRows": len(prediction_df),
861
- }
862
- return pd.DataFrame.from_records([metrics])
863
- except Exception as e:
864
- self.log.warning(f"Error computing regression metrics: {str(e)}")
865
- return pd.DataFrame()
920
+ return compute_regression_metrics(prediction_df, target_column, prediction_col)
866
921
 
867
922
  def residuals(self, target_column: str, prediction_df: pd.DataFrame) -> pd.DataFrame:
868
923
  """Add the residuals to the prediction DataFrame
@@ -872,11 +927,13 @@ class EndpointCore(Artifact):
872
927
  Returns:
873
928
  pd.DataFrame: DataFrame with two new columns called 'residuals' and 'residuals_abs'
874
929
  """
930
+ # Check for prediction column
931
+ if "prediction" not in prediction_df.columns:
932
+ self.log.warning("No 'prediction' column found. Cannot compute residuals.")
933
+ return prediction_df
875
934
 
876
- # Compute the residuals
877
935
  y_true = prediction_df[target_column]
878
- prediction_col = "prediction" if "prediction" in prediction_df.columns else "predictions"
879
- y_pred = prediction_df[prediction_col]
936
+ y_pred = prediction_df["prediction"]
880
937
 
881
938
  # Check for classification scenario
882
939
  if not pd.api.types.is_numeric_dtype(y_true) or not pd.api.types.is_numeric_dtype(y_pred):
@@ -890,92 +947,22 @@ class EndpointCore(Artifact):
890
947
 
891
948
  return prediction_df
892
949
 
893
- @staticmethod
894
- def validate_proba_columns(prediction_df: pd.DataFrame, class_labels: list, guessing: bool = False):
895
- """Ensure probability columns are correctly aligned with class labels
896
-
897
- Args:
898
- prediction_df (pd.DataFrame): DataFrame with the prediction results
899
- class_labels (list): List of class labels
900
- guessing (bool, optional): Whether we're guessing the class labels. Defaults to False.
901
- """
902
- proba_columns = [col.replace("_proba", "") for col in prediction_df.columns if col.endswith("_proba")]
903
-
904
- if sorted(class_labels) != sorted(proba_columns):
905
- if guessing:
906
- raise ValueError(f"_proba columns {proba_columns} != GUESSED class_labels {class_labels}!")
907
- else:
908
- raise ValueError(f"_proba columns {proba_columns} != class_labels {class_labels}!")
909
-
910
- def classification_metrics(self, target_column: str, prediction_df: pd.DataFrame) -> pd.DataFrame:
950
+ def classification_metrics(
951
+ self, target_column: str, prediction_df: pd.DataFrame, prediction_col: str = "prediction"
952
+ ) -> pd.DataFrame:
911
953
  """Compute the performance metrics for this Endpoint
912
954
 
913
955
  Args:
914
956
  target_column (str): Name of the target column
915
957
  prediction_df (pd.DataFrame): DataFrame with the prediction results
958
+ prediction_col (str): Name of the prediction column (default: "prediction")
916
959
 
917
960
  Returns:
918
961
  pd.DataFrame: DataFrame with the performance metrics
919
962
  """
920
- # Drop rows with NaN predictions (can't compute metrics on missing predictions)
921
- prediction_col = "prediction" if "prediction" in prediction_df.columns else "predictions"
922
- nan_mask = prediction_df[prediction_col].isna()
923
- if nan_mask.any():
924
- n_nan = nan_mask.sum()
925
- self.log.warning(f"Dropping {n_nan} rows with NaN predictions for metrics calculation")
926
- prediction_df = prediction_df[~nan_mask].copy()
927
-
928
- # Get the class labels from the model
963
+ # Get class labels from the model (metrics_utils will infer if None)
929
964
  class_labels = ModelCore(self.model_name).class_labels()
930
- if class_labels is None:
931
- self.log.warning(
932
- "Class labels not found in the model. Guessing class labels from the prediction DataFrame."
933
- )
934
- class_labels = prediction_df[target_column].unique().tolist()
935
- self.validate_proba_columns(prediction_df, class_labels, guessing=True)
936
- else:
937
- self.validate_proba_columns(prediction_df, class_labels)
938
-
939
- # Calculate precision, recall, f1, and support, handling zero division
940
- scores = precision_recall_fscore_support(
941
- prediction_df[target_column],
942
- prediction_df[prediction_col],
943
- average=None,
944
- labels=class_labels,
945
- zero_division=0,
946
- )
947
-
948
- # Identify the probability columns and keep them as a Pandas DataFrame
949
- proba_columns = [f"{label}_proba" for label in class_labels]
950
- y_score = prediction_df[proba_columns]
951
-
952
- # One-hot encode the true labels using all class labels (fit with class_labels)
953
- encoder = OneHotEncoder(categories=[class_labels], sparse_output=False)
954
- y_true = encoder.fit_transform(prediction_df[[target_column]])
955
-
956
- # Calculate ROC AUC per label and handle exceptions for missing classes
957
- roc_auc_per_label = []
958
- for i, label in enumerate(class_labels):
959
- try:
960
- roc_auc = roc_auc_score(y_true[:, i], y_score.iloc[:, i])
961
- except ValueError as e:
962
- self.log.warning(f"ROC AUC calculation failed for label {label}.")
963
- self.log.warning(f"{str(e)}")
964
- roc_auc = 0.0
965
- roc_auc_per_label.append(roc_auc)
966
-
967
- # Put the scores into a DataFrame
968
- score_df = pd.DataFrame(
969
- {
970
- target_column: class_labels,
971
- "precision": scores[0],
972
- "recall": scores[1],
973
- "f1": scores[2],
974
- "roc_auc": roc_auc_per_label,
975
- "support": scores[3],
976
- }
977
- )
978
- return score_df
965
+ return compute_classification_metrics(prediction_df, target_column, class_labels, prediction_col)
979
966
 
980
967
  def generate_confusion_matrix(self, target_column: str, prediction_df: pd.DataFrame) -> pd.DataFrame:
981
968
  """Compute the confusion matrix for this Endpoint
@@ -987,16 +974,20 @@ class EndpointCore(Artifact):
987
974
  Returns:
988
975
  pd.DataFrame: DataFrame with the confusion matrix
989
976
  """
977
+ # Check for prediction column
978
+ if "prediction" not in prediction_df.columns:
979
+ self.log.warning("No 'prediction' column found in DataFrame")
980
+ return pd.DataFrame()
981
+
990
982
  # Drop rows with NaN predictions (can't include in confusion matrix)
991
- prediction_col = "prediction" if "prediction" in prediction_df.columns else "predictions"
992
- nan_mask = prediction_df[prediction_col].isna()
983
+ nan_mask = prediction_df["prediction"].isna()
993
984
  if nan_mask.any():
994
985
  n_nan = nan_mask.sum()
995
986
  self.log.warning(f"Dropping {n_nan} rows with NaN predictions for confusion matrix")
996
987
  prediction_df = prediction_df[~nan_mask].copy()
997
988
 
998
989
  y_true = prediction_df[target_column]
999
- y_pred = prediction_df[prediction_col]
990
+ y_pred = prediction_df["prediction"]
1000
991
 
1001
992
  # Get model class labels
1002
993
  model_class_labels = ModelCore(self.model_name).class_labels()