PyPI - workbench - Versions diffs - 0.8.174__py3-none-any.whl → 0.8.227__py3-none-any.whl - Mend

workbench 0.8.174py3-none-any.whl → 0.8.227py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of workbench might be problematic. Click here for more details.

Files changed (145) hide show

workbench/__init__.py +1 -0
workbench/algorithms/dataframe/__init__.py +1 -2
workbench/algorithms/dataframe/compound_dataset_overlap.py +321 -0
workbench/algorithms/dataframe/feature_space_proximity.py +168 -75
workbench/algorithms/dataframe/fingerprint_proximity.py +422 -86
workbench/algorithms/dataframe/projection_2d.py +44 -21
workbench/algorithms/dataframe/proximity.py +259 -305
workbench/algorithms/graph/light/proximity_graph.py +12 -11
workbench/algorithms/models/cleanlab_model.py +382 -0
workbench/algorithms/models/noise_model.py +388 -0
workbench/algorithms/sql/column_stats.py +0 -1
workbench/algorithms/sql/correlations.py +0 -1
workbench/algorithms/sql/descriptive_stats.py +0 -1
workbench/algorithms/sql/outliers.py +3 -3
workbench/api/__init__.py +5 -1
workbench/api/df_store.py +17 -108
workbench/api/endpoint.py +14 -12
workbench/api/feature_set.py +117 -11
workbench/api/meta.py +0 -1
workbench/api/meta_model.py +289 -0
workbench/api/model.py +52 -21
workbench/api/parameter_store.py +3 -52
workbench/cached/cached_meta.py +0 -1
workbench/cached/cached_model.py +49 -11
workbench/core/artifacts/__init__.py +11 -2
workbench/core/artifacts/artifact.py +7 -7
workbench/core/artifacts/data_capture_core.py +8 -1
workbench/core/artifacts/df_store_core.py +114 -0
workbench/core/artifacts/endpoint_core.py +323 -205
workbench/core/artifacts/feature_set_core.py +249 -45
workbench/core/artifacts/model_core.py +133 -101
workbench/core/artifacts/parameter_store_core.py +98 -0
workbench/core/cloud_platform/aws/aws_account_clamp.py +48 -2
workbench/core/cloud_platform/cloud_meta.py +0 -1
workbench/core/pipelines/pipeline_executor.py +1 -1
workbench/core/transforms/features_to_model/features_to_model.py +60 -44
workbench/core/transforms/model_to_endpoint/model_to_endpoint.py +43 -10
workbench/core/transforms/pandas_transforms/pandas_to_features.py +38 -2
workbench/core/views/training_view.py +113 -42
workbench/core/views/view.py +53 -3
workbench/core/views/view_utils.py +4 -4
workbench/model_script_utils/model_script_utils.py +339 -0
workbench/model_script_utils/pytorch_utils.py +405 -0
workbench/model_script_utils/uq_harness.py +277 -0
workbench/model_scripts/chemprop/chemprop.template +774 -0
workbench/model_scripts/chemprop/generated_model_script.py +774 -0
workbench/model_scripts/chemprop/model_script_utils.py +339 -0
workbench/model_scripts/chemprop/requirements.txt +3 -0
workbench/model_scripts/custom_models/chem_info/fingerprints.py +175 -0
workbench/model_scripts/custom_models/chem_info/mol_descriptors.py +18 -7
workbench/model_scripts/custom_models/chem_info/mol_standardize.py +80 -58
workbench/model_scripts/custom_models/chem_info/molecular_descriptors.py +0 -1
workbench/model_scripts/custom_models/chem_info/morgan_fingerprints.py +1 -2
workbench/model_scripts/custom_models/proximity/feature_space_proximity.py +194 -0
workbench/model_scripts/custom_models/proximity/feature_space_proximity.template +8 -10
workbench/model_scripts/custom_models/uq_models/bayesian_ridge.template +7 -8
workbench/model_scripts/custom_models/uq_models/ensemble_xgb.template +20 -21
workbench/model_scripts/custom_models/uq_models/feature_space_proximity.py +194 -0
workbench/model_scripts/custom_models/uq_models/gaussian_process.template +5 -11
workbench/model_scripts/custom_models/uq_models/ngboost.template +15 -16
workbench/model_scripts/ensemble_xgb/ensemble_xgb.template +15 -17
workbench/model_scripts/meta_model/generated_model_script.py +209 -0
workbench/model_scripts/meta_model/meta_model.template +209 -0
workbench/model_scripts/pytorch_model/generated_model_script.py +443 -499
workbench/model_scripts/pytorch_model/model_script_utils.py +339 -0
workbench/model_scripts/pytorch_model/pytorch.template +440 -496
workbench/model_scripts/pytorch_model/pytorch_utils.py +405 -0
workbench/model_scripts/pytorch_model/requirements.txt +1 -1
workbench/model_scripts/pytorch_model/uq_harness.py +277 -0
workbench/model_scripts/scikit_learn/generated_model_script.py +7 -12
workbench/model_scripts/scikit_learn/scikit_learn.template +4 -9
workbench/model_scripts/script_generation.py +15 -12
workbench/model_scripts/uq_models/generated_model_script.py +248 -0
workbench/model_scripts/xgb_model/generated_model_script.py +371 -403
workbench/model_scripts/xgb_model/model_script_utils.py +339 -0
workbench/model_scripts/xgb_model/uq_harness.py +277 -0
workbench/model_scripts/xgb_model/xgb_model.template +367 -399
workbench/repl/workbench_shell.py +18 -14
workbench/resources/open_source_api.key +1 -1
workbench/scripts/endpoint_test.py +162 -0
workbench/scripts/lambda_test.py +73 -0
workbench/scripts/meta_model_sim.py +35 -0
workbench/scripts/ml_pipeline_sqs.py +122 -6
workbench/scripts/training_test.py +85 -0
workbench/themes/dark/custom.css +59 -0
workbench/themes/dark/plotly.json +5 -5
workbench/themes/light/custom.css +153 -40
workbench/themes/light/plotly.json +9 -9
workbench/themes/midnight_blue/custom.css +59 -0
workbench/utils/aws_utils.py +0 -1
workbench/utils/chem_utils/fingerprints.py +87 -46
workbench/utils/chem_utils/mol_descriptors.py +18 -7
workbench/utils/chem_utils/mol_standardize.py +80 -58
workbench/utils/chem_utils/projections.py +16 -6
workbench/utils/chem_utils/vis.py +25 -27
workbench/utils/chemprop_utils.py +141 -0
workbench/utils/config_manager.py +2 -6
workbench/utils/endpoint_utils.py +5 -7
workbench/utils/license_manager.py +2 -6
workbench/utils/markdown_utils.py +57 -0
workbench/utils/meta_model_simulator.py +499 -0
workbench/utils/metrics_utils.py +256 -0
workbench/utils/model_utils.py +274 -87
workbench/utils/pipeline_utils.py +0 -1
workbench/utils/plot_utils.py +159 -34
workbench/utils/pytorch_utils.py +87 -0
workbench/utils/shap_utils.py +11 -57
workbench/utils/theme_manager.py +95 -30
workbench/utils/xgboost_local_crossfold.py +267 -0
workbench/utils/xgboost_model_utils.py +127 -220
workbench/web_interface/components/experiments/outlier_plot.py +0 -1
workbench/web_interface/components/model_plot.py +16 -2
workbench/web_interface/components/plugin_unit_test.py +5 -3
workbench/web_interface/components/plugins/ag_table.py +2 -4
workbench/web_interface/components/plugins/confusion_matrix.py +3 -6
workbench/web_interface/components/plugins/model_details.py +48 -80
workbench/web_interface/components/plugins/scatter_plot.py +192 -92
workbench/web_interface/components/settings_menu.py +184 -0
workbench/web_interface/page_views/main_page.py +0 -1
{workbench-0.8.174.dist-info → workbench-0.8.227.dist-info}/METADATA +31 -17
{workbench-0.8.174.dist-info → workbench-0.8.227.dist-info}/RECORD +125 -111
{workbench-0.8.174.dist-info → workbench-0.8.227.dist-info}/entry_points.txt +4 -0
{workbench-0.8.174.dist-info → workbench-0.8.227.dist-info}/licenses/LICENSE +1 -1
workbench/core/cloud_platform/aws/aws_df_store.py +0 -404
workbench/core/cloud_platform/aws/aws_parameter_store.py +0 -280
workbench/model_scripts/custom_models/meta_endpoints/example.py +0 -53
workbench/model_scripts/custom_models/proximity/generated_model_script.py +0 -138
workbench/model_scripts/custom_models/proximity/proximity.py +0 -384
workbench/model_scripts/custom_models/uq_models/generated_model_script.py +0 -393
workbench/model_scripts/custom_models/uq_models/mapie.template +0 -502
workbench/model_scripts/custom_models/uq_models/meta_uq.template +0 -386
workbench/model_scripts/custom_models/uq_models/proximity.py +0 -384
workbench/model_scripts/ensemble_xgb/generated_model_script.py +0 -279
workbench/model_scripts/quant_regression/quant_regression.template +0 -279
workbench/model_scripts/quant_regression/requirements.txt +0 -1
workbench/themes/quartz/base_css.url +0 -1
workbench/themes/quartz/custom.css +0 -117
workbench/themes/quartz/plotly.json +0 -642
workbench/themes/quartz_dark/base_css.url +0 -1
workbench/themes/quartz_dark/custom.css +0 -131
workbench/themes/quartz_dark/plotly.json +0 -642
workbench/utils/fast_inference.py +0 -167
workbench/utils/resource_utils.py +0 -39
{workbench-0.8.174.dist-info → workbench-0.8.227.dist-info}/WHEEL +0 -0
{workbench-0.8.174.dist-info → workbench-0.8.227.dist-info}/top_level.txt +0 -0

workbench/core/artifacts/endpoint_core.py CHANGED Viewed

@@ -12,16 +12,8 @@ from typing import Union, Optional
 import hashlib
 # Model Performance Scores
-from sklearn.metrics import (
-    mean_absolute_error,
-    r2_score,
-    median_absolute_error,
-    roc_auc_score,
-    confusion_matrix,
-    precision_recall_fscore_support,
-    mean_squared_error,
-)
-from sklearn.preprocessing import OneHotEncoder
+from sklearn.metrics import confusion_matrix
+from workbench.utils.metrics_utils import compute_regression_metrics, compute_classification_metrics
 # SageMaker Imports
 from sagemaker.serializers import CSVSerializer
@@ -30,13 +22,15 @@ from sagemaker import Predictor
 # Workbench Imports
 from workbench.core.artifacts.artifact import Artifact
-from workbench.core.artifacts import FeatureSetCore, ModelCore, ModelType
+from workbench.core.artifacts import FeatureSetCore, ModelCore, ModelType, ModelFramework
 from workbench.utils.endpoint_metrics import EndpointMetrics
-from workbench.utils.fast_inference import fast_inference
 from workbench.utils.cache import Cache
 from workbench.utils.s3_utils import compute_s3_object_hash
 from workbench.utils.model_utils import uq_metrics
-from workbench.utils.xgboost_model_utils import cross_fold_inference
+from workbench.utils.xgboost_model_utils import pull_cv_results as xgboost_pull_cv
+from workbench.utils.pytorch_utils import pull_cv_results as pytorch_pull_cv
+from workbench.utils.chemprop_utils import pull_cv_results as chemprop_pull_cv
+from workbench_bridges.endpoints.fast_inference import fast_inference
 class EndpointCore(Artifact):
@@ -336,12 +330,8 @@ class EndpointCore(Artifact):
         self.details()
         return True
-    def auto_inference(self, capture: bool = False) -> pd.DataFrame:
-        """Run inference on the endpoint using FeatureSet data
-        Args:
-            capture (bool, optional): Capture the inference results and metrics (default=False)
-        """
+    def auto_inference(self) -> pd.DataFrame:
+        """Run inference on the endpoint using the test data from the model training view"""
         # Sanity Check that we have a model
         model = ModelCore(self.get_input())
@@ -349,22 +339,40 @@ class EndpointCore(Artifact):
             self.log.error("No model found for this endpoint. Returning empty DataFrame.")
             return pd.DataFrame()
-        # Now get the FeatureSet and make sure it exists
-        fs = FeatureSetCore(model.get_input())
-        if not fs.exists():
-            self.log.error("No FeatureSet found for this endpoint. Returning empty DataFrame.")
+        # Grab the evaluation data from the Model's training view
+        all_df = model.training_view().pull_dataframe()
+        eval_df = all_df[~all_df["training"]]
+        # Remove AWS created columns
+        aws_cols = ["write_time", "api_invocation_time", "is_deleted", "event_time"]
+        eval_df = eval_df.drop(columns=aws_cols, errors="ignore")
+        # Run inference
+        return self.inference(eval_df, "auto_inference")
+    def full_inference(self) -> pd.DataFrame:
+        """Run inference on the endpoint using all the data from the model training view"""
+        # Sanity Check that we have a model
+        model = ModelCore(self.get_input())
+        if not model.exists():
+            self.log.error("No model found for this endpoint. Returning empty DataFrame.")
             return pd.DataFrame()
-        # Grab the evaluation data from the FeatureSet
-        table = fs.view("training").table
-        eval_df = fs.query(f'SELECT * FROM "{table}" where training = FALSE')
-        capture_name = "auto_inference" if capture else None
-        return self.inference(eval_df, capture_name, id_column=fs.id_column)
+        # Grab the full data from the Model's training view
+        eval_df = model.training_view().pull_dataframe()
+        # Remove AWS created columns
+        aws_cols = ["write_time", "api_invocation_time", "is_deleted", "event_time"]
+        eval_df = eval_df.drop(columns=aws_cols, errors="ignore")
+        # Run inference
+        return self.inference(eval_df, "full_inference")
     def inference(
         self, eval_df: pd.DataFrame, capture_name: str = None, id_column: str = None, drop_error_rows: bool = False
     ) -> pd.DataFrame:
-        """Run inference and compute performance metrics with optional capture
+        """Run inference on the Endpoint using the provided DataFrame
         Args:
             eval_df (pd.DataFrame): DataFrame to run predictions on (must have superset of features)
@@ -387,7 +395,7 @@ class EndpointCore(Artifact):
         # Grab the model features and target column
         model = ModelCore(self.model_name)
         features = model.features()
-        target_column = model.target()
+        targets = model.target()  # Note: We have multi-target models (so this could be a list)
         # Run predictions on the evaluation data
         prediction_df = self._predict(eval_df, features, drop_error_rows)
@@ -395,65 +403,213 @@ class EndpointCore(Artifact):
             self.log.warning("No predictions were made. Returning empty DataFrame.")
             return prediction_df
+        # Normalize targets to handle both string and list formats
+        if isinstance(targets, list):
+            primary_target = targets[0] if targets else None
+        else:
+            primary_target = targets
         # Sanity Check that the target column is present
-        if target_column and (target_column not in prediction_df.columns):
-            self.log.important(f"Target Column {target_column} not found in prediction_df!")
+        if primary_target not in prediction_df.columns:
+            self.log.important(f"Target Column {primary_target} not found in prediction_df!")
             self.log.important("In order to compute metrics, the target column must be present!")
-            return prediction_df
+            metrics = pd.DataFrame()
         # Compute the standard performance metrics for this model
-        model_type = model.model_type
-        if model_type in [ModelType.REGRESSOR, ModelType.UQ_REGRESSOR, ModelType.ENSEMBLE_REGRESSOR]:
-            prediction_df = self.residuals(target_column, prediction_df)
-            metrics = self.regression_metrics(target_column, prediction_df)
-        elif model_type == ModelType.CLASSIFIER:
-            metrics = self.classification_metrics(target_column, prediction_df)
         else:
-            # For other model types, we don't compute metrics
-            self.log.info(f"Model Type: {model_type} doesn't have metrics...")
-            metrics = pd.DataFrame()
+            if model.model_type in [ModelType.REGRESSOR, ModelType.UQ_REGRESSOR, ModelType.ENSEMBLE_REGRESSOR]:
+                prediction_df = self.residuals(primary_target, prediction_df)
+                metrics = self.regression_metrics(primary_target, prediction_df)
+            elif model.model_type == ModelType.CLASSIFIER:
+                metrics = self.classification_metrics(primary_target, prediction_df)
+            else:
+                # For other model types, we don't compute metrics
+                self.log.info(f"Model Type: {model.model_type} doesn't have metrics...")
+                metrics = pd.DataFrame()
         # Print out the metrics
-        if not metrics.empty:
-            print(f"Performance Metrics for {self.model_name} on {self.name}")
-            print(metrics.head())
+        print(f"Performance Metrics for {self.model_name} on {self.name}")
+        print(metrics.head())
-            # Capture the inference results and metrics
-            if capture_name is not None:
-                description = capture_name.replace("_", " ").title()
-                features = model.features()
-                self._capture_inference_results(
-                    capture_name, prediction_df, target_column, model_type, metrics, description, features, id_column
-                )
+        # Capture the inference results and metrics
+        if primary_target and capture_name:
+            # If we don't have an id_column, we'll pull it from the model's FeatureSet
+            if id_column is None:
+                fs = FeatureSetCore(model.get_input())
+                id_column = fs.id_column
-                # For UQ Models we also capture the uncertainty metrics
-                if model_type in [ModelType.UQ_REGRESSOR]:
-                    metrics = uq_metrics(prediction_df, target_column)
+            # Normalize targets to a list for iteration
+            target_list = targets if isinstance(targets, list) else [targets]
+            primary_target = target_list[0]
-                    # Now put into the Parameter Store Model Inference Namespace
-                    self.param_store.upsert(f"/workbench/models/{model.name}/inference/{capture_name}", metrics)
+            # For single-target models (99% of cases), just save with capture_name
+            # For multi-target models, save each as {prefix}_{target} plus primary as capture_name
+            is_multi_target = len(target_list) > 1
+            if is_multi_target:
+                prefix = "auto" if capture_name == "auto_inference" else capture_name
+            for target in target_list:
+                # Drop rows with NaN target values for metrics/plots
+                target_df = prediction_df.dropna(subset=[target])
+                # For multi-target models, prediction column is {target}_pred, otherwise "prediction"
+                pred_col = f"{target}_pred" if is_multi_target else "prediction"
+                # Compute per-target metrics
+                if model.model_type in [ModelType.REGRESSOR, ModelType.UQ_REGRESSOR, ModelType.ENSEMBLE_REGRESSOR]:
+                    target_metrics = self.regression_metrics(target, target_df, prediction_col=pred_col)
+                elif model.model_type == ModelType.CLASSIFIER:
+                    target_metrics = self.classification_metrics(target, target_df, prediction_col=pred_col)
+                else:
+                    target_metrics = pd.DataFrame()
+                if is_multi_target:
+                    # Multi-target: save as {prefix}_{target}
+                    target_capture_name = f"{prefix}_{target}"
+                    description = target_capture_name.replace("_", " ").title()
+                    self._capture_inference_results(
+                        target_capture_name,
+                        target_df,
+                        target,
+                        model.model_type,
+                        target_metrics,
+                        description,
+                        features,
+                        id_column,
+                    )
+                # Save primary target (or single target) with original capture_name
+                if target == primary_target:
+                    self._capture_inference_results(
+                        capture_name,
+                        target_df,
+                        target,
+                        model.model_type,
+                        target_metrics,
+                        capture_name.replace("_", " ").title(),
+                        features,
+                        id_column,
+                    )
+            # Capture uncertainty metrics if prediction_std is available (UQ, ChemProp, etc.)
+            if "prediction_std" in prediction_df.columns:
+                metrics = uq_metrics(prediction_df, primary_target)
+                self.param_store.upsert(f"/workbench/models/{model.name}/inference/{capture_name}", metrics)
         # Return the prediction DataFrame
         return prediction_df
-    def cross_fold_inference(self, nfolds: int = 5) -> dict:
-        """Run cross-fold inference (only works for XGBoost models)
-        Args:
-            nfolds (int): Number of folds to use for cross-fold (default: 5)
+    def cross_fold_inference(self) -> pd.DataFrame:
+        """Pull cross-fold inference training results for this Endpoint's model
         Returns:
-            dict: Dictionary with the cross-fold inference results
+            pd.DataFrame: A DataFrame with cross fold predictions
         """
         # Grab our model
         model = ModelCore(self.model_name)
-        # Compute CrossFold Metrics
-        cross_fold_metrics = cross_fold_inference(model, nfolds=nfolds)
-        if cross_fold_metrics:
-            self.param_store.upsert(f"/workbench/models/{model.name}/inference/cross_fold", cross_fold_metrics)
-        return cross_fold_metrics
+        # Compute CrossFold (Metrics and Prediction Dataframe)
+        # For PyTorch and ChemProp, pull pre-computed CV results from training
+        if model.model_framework in [ModelFramework.UNKNOWN, ModelFramework.XGBOOST]:
+            cross_fold_metrics, out_of_fold_df = xgboost_pull_cv(model)
+        elif model.model_framework == ModelFramework.PYTORCH:
+            cross_fold_metrics, out_of_fold_df = pytorch_pull_cv(model)
+        elif model.model_framework == ModelFramework.CHEMPROP:
+            cross_fold_metrics, out_of_fold_df = chemprop_pull_cv(model)
+        else:
+            self.log.error(f"Cross-Fold Inference not supported for Model Framework: {model.model_framework}.")
+            return pd.DataFrame()
+        # If the metrics dataframe isn't empty save to the param store
+        if not cross_fold_metrics.empty:
+            # Convert to list of dictionaries
+            metrics = cross_fold_metrics.to_dict(orient="records")
+            self.param_store.upsert(f"/workbench/models/{model.name}/inference/cross_fold", metrics)
+        # If the out_of_fold_df is empty return it
+        if out_of_fold_df.empty:
+            self.log.warning("No out-of-fold predictions were made. Returning empty DataFrame.")
+            return out_of_fold_df
+        # Capture the results
+        targets = model.target()  # Note: We have multi-target models (so this could be a list)
+        model_type = model.model_type
+        # Get the id_column from the model's FeatureSet
+        fs = FeatureSetCore(model.get_input())
+        id_column = fs.id_column
+        # Normalize targets to a list for iteration
+        target_list = targets if isinstance(targets, list) else [targets]
+        primary_target = target_list[0]
+        # If we don't have a smiles column, try to merge it from the FeatureSet
+        if "smiles" not in out_of_fold_df.columns:
+            fs_df = fs.query(f'SELECT {fs.id_column}, "smiles" FROM "{fs.athena_table}"')
+            if "smiles" in fs_df.columns:
+                self.log.info("Merging 'smiles' column from FeatureSet into out-of-fold predictions.")
+                out_of_fold_df = out_of_fold_df.merge(fs_df, on=fs.id_column, how="left")
+        # Collect UQ columns (q_*, confidence) for additional tracking (used for hashing)
+        additional_columns = [col for col in out_of_fold_df.columns if col.startswith("q_") or col == "confidence"]
+        if additional_columns:
+            self.log.info(f"UQ columns from training: {', '.join(additional_columns)}")
+        # Capture uncertainty metrics if prediction_std is available (UQ, ChemProp, etc.)
+        if "prediction_std" in out_of_fold_df.columns:
+            metrics = uq_metrics(out_of_fold_df, primary_target)
+            self.param_store.upsert(f"/workbench/models/{model.name}/inference/full_cross_fold", metrics)
+        # For single-target models (99% of cases), just save as "full_cross_fold"
+        # For multi-target models, save each as cv_{target} plus primary as "full_cross_fold"
+        is_multi_target = len(target_list) > 1
+        for target in target_list:
+            # Drop rows with NaN target values for metrics/plots
+            target_df = out_of_fold_df.dropna(subset=[target])
+            # For multi-target models, prediction column is {target}_pred, otherwise "prediction"
+            pred_col = f"{target}_pred" if is_multi_target else "prediction"
+            # Compute per-target metrics
+            if model_type in [ModelType.REGRESSOR, ModelType.UQ_REGRESSOR, ModelType.ENSEMBLE_REGRESSOR]:
+                target_metrics = self.regression_metrics(target, target_df, prediction_col=pred_col)
+            elif model_type == ModelType.CLASSIFIER:
+                target_metrics = self.classification_metrics(target, target_df, prediction_col=pred_col)
+            else:
+                target_metrics = pd.DataFrame()
+            if is_multi_target:
+                # Multi-target: save as cv_{target}
+                capture_name = f"cv_{target}"
+                description = capture_name.replace("_", " ").title()
+                self._capture_inference_results(
+                    capture_name,
+                    target_df,
+                    target,
+                    model_type,
+                    target_metrics,
+                    description,
+                    features=additional_columns,
+                    id_column=id_column,
+                )
+            # Save primary target (or single target) as "full_cross_fold"
+            if target == primary_target:
+                self._capture_inference_results(
+                    "full_cross_fold",
+                    target_df,
+                    target,
+                    model_type,
+                    target_metrics,
+                    "Full Cross Fold",
+                    features=additional_columns,
+                    id_column=id_column,
+                )
+        return out_of_fold_df
     def fast_inference(self, eval_df: pd.DataFrame, threads: int = 4) -> pd.DataFrame:
         """Run inference on the Endpoint using the provided DataFrame
@@ -648,6 +804,10 @@ class EndpointCore(Artifact):
     @staticmethod
     def _hash_dataframe(df: pd.DataFrame, hash_length: int = 8):
         # Internal: Compute a data hash for the dataframe
+        if df.empty:
+            return "--hash--"
+        # Sort the dataframe by columns to ensure consistent ordering
         df = df.copy()
         df = df.sort_values(by=sorted(df.columns.tolist()))
         row_hashes = pd.util.hash_pandas_object(df, index=False)
@@ -658,19 +818,19 @@ class EndpointCore(Artifact):
         self,
         capture_name: str,
         pred_results_df: pd.DataFrame,
-        target_column: str,
+        target: str,
         model_type: ModelType,
         metrics: pd.DataFrame,
         description: str,
         features: list,
         id_column: str = None,
     ):
-        """Internal: Capture the inference results and metrics to S3
+        """Internal: Capture the inference results and metrics to S3 for a single target
         Args:
             capture_name (str): Name of the inference capture
             pred_results_df (pd.DataFrame): DataFrame with the prediction results
-            target_column (str): Name of the target column
+            target (str): Target column name
             model_type (ModelType): Type of the model (e.g. REGRESSOR, CLASSIFIER)
             metrics (pd.DataFrame): DataFrame with the performance metrics
             description (str): Description of the inference results
@@ -701,28 +861,12 @@ class EndpointCore(Artifact):
         self.log.info(f"Writing metrics to {inference_capture_path}/inference_metrics.csv")
         wr.s3.to_csv(metrics, f"{inference_capture_path}/inference_metrics.csv", index=False)
-        # Grab the target column, prediction column, any _proba columns, and the ID column (if present)
-        prediction_col = "prediction" if "prediction" in pred_results_df.columns else "predictions"
-        output_columns = [target_column, prediction_col]
-        # Add any _proba columns to the output columns
-        output_columns += [col for col in pred_results_df.columns if col.endswith("_proba")]
-        # Add any quantile columns to the output columns
-        output_columns += [col for col in pred_results_df.columns if col.startswith("q_") or col.startswith("qr_")]
-        # Add the ID column
-        if id_column and id_column in pred_results_df.columns:
-            output_columns.append(id_column)
-        # Write the predictions to our S3 Model Inference Folder
-        self.log.info(f"Writing predictions to {inference_capture_path}/inference_predictions.csv")
-        subset_df = pred_results_df[output_columns]
-        wr.s3.to_csv(subset_df, f"{inference_capture_path}/inference_predictions.csv", index=False)
+        # Save the inference predictions for this target
+        self._save_target_inference(inference_capture_path, pred_results_df, target, id_column)
         # CLASSIFIER: Write the confusion matrix to our S3 Model Inference Folder
         if model_type == ModelType.CLASSIFIER:
-            conf_mtx = self.generate_confusion_matrix(target_column, pred_results_df)
+            conf_mtx = self.generate_confusion_matrix(target, pred_results_df)
             self.log.info(f"Writing confusion matrix to {inference_capture_path}/inference_cm.csv")
             # Note: Unlike other dataframes here, we want to write the index (labels) to the CSV
             wr.s3.to_csv(conf_mtx, f"{inference_capture_path}/inference_cm.csv", index=True)
@@ -732,47 +876,58 @@ class EndpointCore(Artifact):
         model = ModelCore(self.model_name)
         model._load_inference_metrics(capture_name)
-    def regression_metrics(self, target_column: str, prediction_df: pd.DataFrame) -> pd.DataFrame:
+    def _save_target_inference(
+        self,
+        inference_capture_path: str,
+        pred_results_df: pd.DataFrame,
+        target: str,
+        id_column: str = None,
+    ):
+        """Save inference results for a single target.
+        Args:
+            inference_capture_path (str): S3 path for inference capture
+            pred_results_df (pd.DataFrame): DataFrame with prediction results
+            target (str): Target column name
+            id_column (str, optional): Name of the ID column
+        """
+        cols = pred_results_df.columns
+        # Build output columns: id, target, prediction, prediction_std, UQ columns, proba columns
+        output_columns = []
+        if id_column and id_column in cols:
+            output_columns.append(id_column)
+        if target and target in cols:
+            output_columns.append(target)
+        output_columns += [c for c in ["prediction", "prediction_std"] if c in cols]
+        # Add UQ columns (q_*, confidence) and proba columns
+        output_columns += [c for c in cols if c.startswith("q_") or c == "confidence" or c.endswith("_proba")]
+        # Add smiles column if present
+        if "smiles" in cols:
+            output_columns.append("smiles")
+        # Write the predictions to S3
+        output_file = f"{inference_capture_path}/inference_predictions.csv"
+        self.log.info(f"Writing predictions to {output_file}")
+        wr.s3.to_csv(pred_results_df[output_columns], output_file, index=False)
+    def regression_metrics(
+        self, target_column: str, prediction_df: pd.DataFrame, prediction_col: str = "prediction"
+    ) -> pd.DataFrame:
         """Compute the performance metrics for this Endpoint
         Args:
             target_column (str): Name of the target column
             prediction_df (pd.DataFrame): DataFrame with the prediction results
+            prediction_col (str): Name of the prediction column (default: "prediction")
         Returns:
             pd.DataFrame: DataFrame with the performance metrics
         """
-        # Sanity Check the prediction DataFrame
-        if prediction_df.empty:
-            self.log.warning("No predictions were made. Returning empty DataFrame.")
-            return pd.DataFrame()
-        # Compute the metrics
-        try:
-            y_true = prediction_df[target_column]
-            prediction_col = "prediction" if "prediction" in prediction_df.columns else "predictions"
-            y_pred = prediction_df[prediction_col]
-            mae = mean_absolute_error(y_true, y_pred)
-            rmse = np.sqrt(mean_squared_error(y_true, y_pred))
-            r2 = r2_score(y_true, y_pred)
-            # Mean Absolute Percentage Error
-            mape = np.mean(np.where(y_true != 0, np.abs((y_true - y_pred) / y_true), np.abs(y_true - y_pred))) * 100
-            # Median Absolute Error
-            medae = median_absolute_error(y_true, y_pred)
-            # Organize and return the metrics
-            metrics = {
-                "MAE": round(mae, 3),
-                "RMSE": round(rmse, 3),
-                "R2": round(r2, 3),
-                "MAPE": round(mape, 3),
-                "MedAE": round(medae, 3),
-                "NumRows": len(prediction_df),
-            }
-            return pd.DataFrame.from_records([metrics])
-        except Exception as e:
-            self.log.warning(f"Error computing regression metrics: {str(e)}")
-            return pd.DataFrame()
+        return compute_regression_metrics(prediction_df, target_column, prediction_col)
     def residuals(self, target_column: str, prediction_df: pd.DataFrame) -> pd.DataFrame:
         """Add the residuals to the prediction DataFrame
@@ -782,11 +937,13 @@ class EndpointCore(Artifact):
         Returns:
             pd.DataFrame: DataFrame with two new columns called 'residuals' and 'residuals_abs'
         """
+        # Check for prediction column
+        if "prediction" not in prediction_df.columns:
+            self.log.warning("No 'prediction' column found. Cannot compute residuals.")
+            return prediction_df
-        # Compute the residuals
         y_true = prediction_df[target_column]
-        prediction_col = "prediction" if "prediction" in prediction_df.columns else "predictions"
-        y_pred = prediction_df[prediction_col]
+        y_pred = prediction_df["prediction"]
         # Check for classification scenario
         if not pd.api.types.is_numeric_dtype(y_true) or not pd.api.types.is_numeric_dtype(y_pred):
@@ -800,85 +957,22 @@ class EndpointCore(Artifact):
         return prediction_df
-    @staticmethod
-    def validate_proba_columns(prediction_df: pd.DataFrame, class_labels: list, guessing: bool = False):
-        """Ensure probability columns are correctly aligned with class labels
-        Args:
-            prediction_df (pd.DataFrame): DataFrame with the prediction results
-            class_labels (list): List of class labels
-            guessing (bool, optional): Whether we're guessing the class labels. Defaults to False.
-        """
-        proba_columns = [col.replace("_proba", "") for col in prediction_df.columns if col.endswith("_proba")]
-        if sorted(class_labels) != sorted(proba_columns):
-            if guessing:
-                raise ValueError(f"_proba columns {proba_columns} != GUESSED class_labels {class_labels}!")
-            else:
-                raise ValueError(f"_proba columns {proba_columns} != class_labels {class_labels}!")
-    def classification_metrics(self, target_column: str, prediction_df: pd.DataFrame) -> pd.DataFrame:
+    def classification_metrics(
+        self, target_column: str, prediction_df: pd.DataFrame, prediction_col: str = "prediction"
+    ) -> pd.DataFrame:
         """Compute the performance metrics for this Endpoint
         Args:
             target_column (str): Name of the target column
             prediction_df (pd.DataFrame): DataFrame with the prediction results
+            prediction_col (str): Name of the prediction column (default: "prediction")
         Returns:
             pd.DataFrame: DataFrame with the performance metrics
         """
-        # Get the class labels from the model
+        # Get class labels from the model (metrics_utils will infer if None)
         class_labels = ModelCore(self.model_name).class_labels()
-        if class_labels is None:
-            self.log.warning(
-                "Class labels not found in the model. Guessing class labels from the prediction DataFrame."
-            )
-            class_labels = prediction_df[target_column].unique().tolist()
-            self.validate_proba_columns(prediction_df, class_labels, guessing=True)
-        else:
-            self.validate_proba_columns(prediction_df, class_labels)
-        # Calculate precision, recall, fscore, and support, handling zero division
-        prediction_col = "prediction" if "prediction" in prediction_df.columns else "predictions"
-        scores = precision_recall_fscore_support(
-            prediction_df[target_column],
-            prediction_df[prediction_col],
-            average=None,
-            labels=class_labels,
-            zero_division=0,
-        )
-        # Identify the probability columns and keep them as a Pandas DataFrame
-        proba_columns = [f"{label}_proba" for label in class_labels]
-        y_score = prediction_df[proba_columns]
-        # One-hot encode the true labels using all class labels (fit with class_labels)
-        encoder = OneHotEncoder(categories=[class_labels], sparse_output=False)
-        y_true = encoder.fit_transform(prediction_df[[target_column]])
-        # Calculate ROC AUC per label and handle exceptions for missing classes
-        roc_auc_per_label = []
-        for i, label in enumerate(class_labels):
-            try:
-                roc_auc = roc_auc_score(y_true[:, i], y_score.iloc[:, i])
-            except ValueError as e:
-                self.log.warning(f"ROC AUC calculation failed for label {label}.")
-                self.log.warning(f"{str(e)}")
-                roc_auc = 0.0
-            roc_auc_per_label.append(roc_auc)
-        # Put the scores into a DataFrame
-        score_df = pd.DataFrame(
-            {
-                target_column: class_labels,
-                "precision": scores[0],
-                "recall": scores[1],
-                "fscore": scores[2],
-                "roc_auc": roc_auc_per_label,
-                "support": scores[3],
-            }
-        )
-        return score_df
+        return compute_classification_metrics(prediction_df, target_column, class_labels, prediction_col)
     def generate_confusion_matrix(self, target_column: str, prediction_df: pd.DataFrame) -> pd.DataFrame:
         """Compute the confusion matrix for this Endpoint
@@ -890,10 +984,20 @@ class EndpointCore(Artifact):
         Returns:
             pd.DataFrame: DataFrame with the confusion matrix
         """
+        # Check for prediction column
+        if "prediction" not in prediction_df.columns:
+            self.log.warning("No 'prediction' column found in DataFrame")
+            return pd.DataFrame()
+        # Drop rows with NaN predictions (can't include in confusion matrix)
+        nan_mask = prediction_df["prediction"].isna()
+        if nan_mask.any():
+            n_nan = nan_mask.sum()
+            self.log.warning(f"Dropping {n_nan} rows with NaN predictions for confusion matrix")
+            prediction_df = prediction_df[~nan_mask].copy()
         y_true = prediction_df[target_column]
-        prediction_col = "prediction" if "prediction" in prediction_df.columns else "predictions"
-        y_pred = prediction_df[prediction_col]
+        y_pred = prediction_df["prediction"]
         # Get model class labels
         model_class_labels = ModelCore(self.model_name).class_labels()
@@ -935,9 +1039,9 @@ class EndpointCore(Artifact):
         self.upsert_workbench_meta({"workbench_input": input})
     def delete(self):
-        """ "Delete an existing Endpoint: Underlying Models, Configuration, and Endpoint"""
+        """Delete an existing Endpoint: Underlying Models, Configuration, and Endpoint"""
         if not self.exists():
-            self.log.warning(f"Trying to delete an Model that doesn't exist: {self.name}")
+            self.log.warning(f"Trying to delete an Endpoint that doesn't exist: {self.name}")
         # Remove this endpoint from the list of registered endpoints
         self.log.info(f"Removing {self.name} from the list of registered endpoints...")
@@ -981,7 +1085,7 @@ class EndpointCore(Artifact):
         # Recursively delete all endpoint S3 artifacts (inference, etc)
         # Note: We do not want to delete the data_capture/ files since these
         #       might be used for collection and data drift analysis
-        base_endpoint_path = f"{cls.endpoints_s3_path}/{endpoint_name}"
+        base_endpoint_path = f"{cls.endpoints_s3_path}/{endpoint_name}/"
         all_s3_objects = wr.s3.list_objects(base_endpoint_path, boto3_session=cls.boto3_session)
         # Filter out objects that contain 'data_capture/' in their path
@@ -1045,7 +1149,7 @@ class EndpointCore(Artifact):
 if __name__ == "__main__":
     """Exercise the Endpoint Class"""
     from workbench.api import FeatureSet
-    from workbench.utils.endpoint_utils import fs_evaluation_data
+    from workbench.utils.endpoint_utils import get_evaluation_data
     import random
     # Grab an EndpointCore object and pull some information from it
@@ -1053,7 +1157,7 @@ if __name__ == "__main__":
     # Test various error conditions (set row 42 length to pd.NA)
     # Note: This test should return ALL rows
-    my_eval_df = fs_evaluation_data(my_endpoint)
+    my_eval_df = get_evaluation_data(my_endpoint)
     my_eval_df.at[42, "length"] = pd.NA
     pred_results = my_endpoint.inference(my_eval_df, drop_error_rows=True)
     print(f"Sent rows: {len(my_eval_df)}")
@@ -1061,6 +1165,9 @@ if __name__ == "__main__":
     assert len(pred_results) == len(my_eval_df), "Predictions should match the number of sent rows"
     # Now we put in an invalid value
+    print("*" * 80)
+    print("NOW TESTING ERROR CONDITIONS...")
+    print("*" * 80)
     my_eval_df.at[42, "length"] = "invalid_value"
     pred_results = my_endpoint.inference(my_eval_df, drop_error_rows=True)
     print(f"Sent rows: {len(my_eval_df)}")
@@ -1121,16 +1228,21 @@ if __name__ == "__main__":
     # Run Inference where we provide the data
     # Note: This dataframe could be from a FeatureSet or any other source
     print("Running Inference...")
-    my_eval_df = fs_evaluation_data(my_endpoint)
+    my_eval_df = get_evaluation_data(my_endpoint)
     pred_results = my_endpoint.inference(my_eval_df)
     # Now set capture=True to save inference results and metrics
-    my_eval_df = fs_evaluation_data(my_endpoint)
+    my_eval_df = get_evaluation_data(my_endpoint)
     pred_results = my_endpoint.inference(my_eval_df, capture_name="holdout_xyz")
     # Run predictions using the fast_inference method
     fast_results = my_endpoint.fast_inference(my_eval_df)
+    # Test the cross_fold_inference method
+    print("Running Cross-Fold Inference...")
+    all_results = my_endpoint.cross_fold_inference()
+    print(all_results)
     # Run Inference and metrics for a Classification Endpoint
     class_endpoint = EndpointCore("wine-classification")
     auto_predictions = class_endpoint.auto_inference()
@@ -1139,6 +1251,12 @@ if __name__ == "__main__":
     target = "wine_class"
     print(class_endpoint.generate_confusion_matrix(target, auto_predictions))
+    # Test the cross_fold_inference method
+    print("Running Cross-Fold Inference...")
+    all_results = class_endpoint.cross_fold_inference()
+    print(all_results)
+    print("All done...")
     # Test the class method delete (commented out for now)
     # from workbench.api import Model
     # model = Model("abalone-regression")

workbench 0.8.174__py3-none-any.whl → 0.8.227__py3-none-any.whl

Potentially problematic release.

workbench 0.8.174py3-none-any.whl → 0.8.227py3-none-any.whl