PyPI - workbench - Versions diffs - 0.8.212__py3-none-any.whl → 0.8.217__py3-none-any.whl - Mend

workbench 0.8.212py3-none-any.whl → 0.8.217py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (50) hide show

workbench/algorithms/dataframe/feature_space_proximity.py +168 -75
workbench/algorithms/dataframe/fingerprint_proximity.py +257 -80
workbench/algorithms/dataframe/projection_2d.py +38 -21
workbench/algorithms/dataframe/proximity.py +75 -150
workbench/algorithms/graph/light/proximity_graph.py +5 -5
workbench/algorithms/models/cleanlab_model.py +382 -0
workbench/algorithms/models/noise_model.py +2 -2
workbench/api/__init__.py +3 -0
workbench/api/endpoint.py +10 -5
workbench/api/feature_set.py +76 -6
workbench/api/meta_model.py +289 -0
workbench/api/model.py +43 -4
workbench/core/artifacts/endpoint_core.py +75 -129
workbench/core/artifacts/feature_set_core.py +1 -1
workbench/core/artifacts/model_core.py +6 -4
workbench/core/pipelines/pipeline_executor.py +1 -1
workbench/core/transforms/model_to_endpoint/model_to_endpoint.py +30 -10
workbench/model_script_utils/pytorch_utils.py +11 -1
workbench/model_scripts/chemprop/chemprop.template +145 -69
workbench/model_scripts/chemprop/generated_model_script.py +147 -71
workbench/model_scripts/custom_models/chem_info/fingerprints.py +7 -3
workbench/model_scripts/custom_models/proximity/feature_space_proximity.py +194 -0
workbench/model_scripts/custom_models/proximity/feature_space_proximity.template +6 -6
workbench/model_scripts/custom_models/uq_models/feature_space_proximity.py +194 -0
workbench/model_scripts/custom_models/uq_models/meta_uq.template +6 -6
workbench/model_scripts/meta_model/generated_model_script.py +209 -0
workbench/model_scripts/meta_model/meta_model.template +209 -0
workbench/model_scripts/pytorch_model/generated_model_script.py +42 -24
workbench/model_scripts/pytorch_model/pytorch.template +42 -24
workbench/model_scripts/pytorch_model/pytorch_utils.py +11 -1
workbench/model_scripts/script_generation.py +4 -0
workbench/model_scripts/xgb_model/generated_model_script.py +169 -158
workbench/model_scripts/xgb_model/xgb_model.template +163 -152
workbench/repl/workbench_shell.py +0 -5
workbench/scripts/endpoint_test.py +2 -2
workbench/utils/chem_utils/fingerprints.py +7 -3
workbench/utils/chemprop_utils.py +23 -5
workbench/utils/meta_model_simulator.py +471 -0
workbench/utils/metrics_utils.py +94 -10
workbench/utils/model_utils.py +91 -9
workbench/utils/pytorch_utils.py +1 -1
workbench/web_interface/components/plugins/scatter_plot.py +4 -8
{workbench-0.8.212.dist-info → workbench-0.8.217.dist-info}/METADATA +2 -1
{workbench-0.8.212.dist-info → workbench-0.8.217.dist-info}/RECORD +48 -43
workbench/model_scripts/custom_models/proximity/proximity.py +0 -410
workbench/model_scripts/custom_models/uq_models/proximity.py +0 -410
{workbench-0.8.212.dist-info → workbench-0.8.217.dist-info}/WHEEL +0 -0
{workbench-0.8.212.dist-info → workbench-0.8.217.dist-info}/entry_points.txt +0 -0
{workbench-0.8.212.dist-info → workbench-0.8.217.dist-info}/licenses/LICENSE +0 -0
{workbench-0.8.212.dist-info → workbench-0.8.217.dist-info}/top_level.txt +0 -0

workbench/api/meta_model.py ADDED Viewed

@@ -0,0 +1,289 @@
+"""MetaModel: A Model that aggregates predictions from multiple child endpoints.
+MetaModels don't train on feature data - they combine predictions from existing
+endpoints using confidence-weighted voting. This provides ensemble benefits
+across different model frameworks (XGBoost, PyTorch, ChemProp, etc.).
+"""
+from pathlib import Path
+import time
+import logging
+from sagemaker.estimator import Estimator
+# Workbench Imports
+from workbench.api.model import Model
+from workbench.api.endpoint import Endpoint
+from workbench.core.artifacts.model_core import ModelCore, ModelType, ModelFramework, ModelImages
+from workbench.core.artifacts.artifact import Artifact
+from workbench.core.cloud_platform.aws.aws_account_clamp import AWSAccountClamp
+from workbench.model_scripts.script_generation import generate_model_script
+from workbench.utils.config_manager import ConfigManager
+from workbench.utils.model_utils import supported_instance_types
+# Set up logging
+log = logging.getLogger("workbench")
+class MetaModel(Model):
+    """MetaModel: A Model that aggregates predictions from child endpoints.
+    Common Usage:
+        ```python
+        # Create a meta model from existing endpoints
+        meta = MetaModel.create(
+            name="my-meta-model",
+            child_endpoints=["endpoint-1", "endpoint-2", "endpoint-3"],
+            target_column="target"
+        )
+        # Deploy like any other model
+        endpoint = meta.to_endpoint()
+        ```
+    """
+    @classmethod
+    def create(
+        cls,
+        name: str,
+        child_endpoints: list[str],
+        target_column: str,
+        description: str = None,
+        tags: list[str] = None,
+    ) -> "MetaModel":
+        """Create a new MetaModel from a list of child endpoints.
+        Args:
+            name: Name for the meta model
+            child_endpoints: List of endpoint names to aggregate
+            target_column: Name of the target column (for metadata)
+            description: Optional description for the model
+            tags: Optional list of tags
+        Returns:
+            MetaModel: The created meta model
+        """
+        Artifact.is_name_valid(name, delimiter="-", lower_case=False)
+        # Validate endpoints and get lineage info from primary endpoint
+        feature_list, feature_set_name, model_weights = cls._validate_and_get_lineage(child_endpoints)
+        # Delete existing model if it exists
+        log.important(f"Trying to delete existing model {name}...")
+        ModelCore.managed_delete(name)
+        # Run training and register model
+        aws_clamp = AWSAccountClamp()
+        estimator = cls._run_training(name, child_endpoints, target_column, model_weights, aws_clamp)
+        cls._register_model(name, child_endpoints, description, tags, estimator, aws_clamp)
+        # Set metadata and onboard
+        cls._set_metadata(name, target_column, feature_list, feature_set_name, child_endpoints)
+        log.important(f"MetaModel {name} created successfully!")
+        return cls(name)
+    @classmethod
+    def _validate_and_get_lineage(cls, child_endpoints: list[str]) -> tuple[list[str], str, dict[str, float]]:
+        """Validate child endpoints exist and get lineage info from primary endpoint.
+        Args:
+            child_endpoints: List of endpoint names
+        Returns:
+            tuple: (feature_list, feature_set_name, model_weights) from the primary endpoint's model
+        """
+        log.info("Verifying child endpoints and gathering model metrics...")
+        mae_scores = {}
+        for ep_name in child_endpoints:
+            ep = Endpoint(ep_name)
+            if not ep.exists():
+                raise ValueError(f"Child endpoint '{ep_name}' does not exist")
+            # Get model MAE from full_inference metrics
+            model = Model(ep.get_input())
+            metrics = model.get_inference_metrics("full_inference")
+            if metrics is not None and "mae" in metrics.columns:
+                mae = float(metrics["mae"].iloc[0])
+                mae_scores[ep_name] = mae
+                log.info(f"  {ep_name} -> {model.name}: MAE={mae:.4f}")
+            else:
+                log.warning(f"  {ep_name}: No full_inference metrics found, using default weight")
+                mae_scores[ep_name] = None
+        # Compute inverse-MAE weights (higher weight for lower MAE)
+        valid_mae = {k: v for k, v in mae_scores.items() if v is not None}
+        if valid_mae:
+            inv_mae = {k: 1.0 / v for k, v in valid_mae.items()}
+            total = sum(inv_mae.values())
+            model_weights = {k: v / total for k, v in inv_mae.items()}
+            # Fill in missing weights with equal share of remaining weight
+            missing = [k for k in mae_scores if mae_scores[k] is None]
+            if missing:
+                equal_weight = (1.0 - sum(model_weights.values())) / len(missing)
+                for k in missing:
+                    model_weights[k] = equal_weight
+        else:
+            # No metrics available, use equal weights
+            model_weights = {k: 1.0 / len(child_endpoints) for k in child_endpoints}
+            log.warning("No MAE metrics found, using equal weights")
+        log.info(f"Model weights: {model_weights}")
+        # Use first endpoint as primary - backtrack to get model and feature set
+        primary_endpoint = Endpoint(child_endpoints[0])
+        primary_model = Model(primary_endpoint.get_input())
+        feature_list = primary_model.features()
+        feature_set_name = primary_model.get_input()
+        log.info(
+            f"Primary endpoint: {child_endpoints[0]} -> Model: {primary_model.name} -> FeatureSet: {feature_set_name}"
+        )
+        return feature_list, feature_set_name, model_weights
+    @classmethod
+    def _run_training(
+        cls,
+        name: str,
+        child_endpoints: list[str],
+        target_column: str,
+        model_weights: dict[str, float],
+        aws_clamp: AWSAccountClamp,
+    ) -> Estimator:
+        """Run the minimal training job that saves the meta model config.
+        Args:
+            name: Model name
+            child_endpoints: List of endpoint names
+            target_column: Target column name
+            model_weights: Dict mapping endpoint name to weight
+            aws_clamp: AWS account clamp
+        Returns:
+            Estimator: The fitted estimator
+        """
+        sm_session = aws_clamp.sagemaker_session()
+        cm = ConfigManager()
+        workbench_bucket = cm.get_config("WORKBENCH_BUCKET")
+        models_s3_path = f"s3://{workbench_bucket}/models"
+        # Generate the model script from template
+        template_params = {
+            "model_type": ModelType.REGRESSOR,
+            "model_framework": ModelFramework.META,
+            "child_endpoints": child_endpoints,
+            "target_column": target_column,
+            "model_weights": model_weights,
+            "model_metrics_s3_path": f"{models_s3_path}/{name}/training",
+            "aws_region": sm_session.boto_region_name,
+        }
+        script_path = generate_model_script(template_params)
+        # Create estimator
+        training_image = ModelImages.get_image_uri(sm_session.boto_region_name, "meta_training")
+        log.info(f"Using Meta Training Image: {training_image}")
+        estimator = Estimator(
+            entry_point=Path(script_path).name,
+            source_dir=str(Path(script_path).parent),
+            role=aws_clamp.aws_session.get_workbench_execution_role_arn(),
+            instance_count=1,
+            instance_type="ml.m5.large",
+            sagemaker_session=sm_session,
+            image_uri=training_image,
+        )
+        # Run training (no input data needed - just saves config)
+        log.important(f"Creating MetaModel {name}...")
+        estimator.fit()
+        return estimator
+    @classmethod
+    def _register_model(
+        cls,
+        name: str,
+        child_endpoints: list[str],
+        description: str,
+        tags: list[str],
+        estimator: Estimator,
+        aws_clamp: AWSAccountClamp,
+    ):
+        """Create model group and register the model.
+        Args:
+            name: Model name
+            child_endpoints: List of endpoint names
+            description: Model description
+            tags: Model tags
+            estimator: Fitted estimator
+            aws_clamp: AWS account clamp
+        """
+        sm_session = aws_clamp.sagemaker_session()
+        model_description = description or f"Meta model aggregating: {', '.join(child_endpoints)}"
+        # Create model group
+        aws_clamp.sagemaker_client().create_model_package_group(
+            ModelPackageGroupName=name,
+            ModelPackageGroupDescription=model_description,
+            Tags=[{"Key": "workbench_tags", "Value": "::".join(tags or [name])}],
+        )
+        # Register the model with meta inference image
+        inference_image = ModelImages.get_image_uri(sm_session.boto_region_name, "meta_inference")
+        log.important(f"Registering model {name} with Inference Image {inference_image}...")
+        estimator.create_model(role=aws_clamp.aws_session.get_workbench_execution_role_arn()).register(
+            model_package_group_name=name,
+            image_uri=inference_image,
+            content_types=["text/csv"],
+            response_types=["text/csv"],
+            inference_instances=supported_instance_types("x86_64"),
+            transform_instances=["ml.m5.large", "ml.m5.xlarge"],
+            approval_status="Approved",
+            description=model_description,
+        )
+    @classmethod
+    def _set_metadata(
+        cls, name: str, target_column: str, feature_list: list[str], feature_set_name: str, child_endpoints: list[str]
+    ):
+        """Set model metadata and onboard.
+        Args:
+            name: Model name
+            target_column: Target column name
+            feature_list: List of feature names
+            feature_set_name: Name of the input FeatureSet
+            child_endpoints: List of child endpoint names
+        """
+        time.sleep(3)
+        output_model = ModelCore(name)
+        output_model._set_model_type(ModelType.UQ_REGRESSOR)
+        output_model._set_model_framework(ModelFramework.META)
+        output_model.set_input(feature_set_name, force=True)
+        output_model.upsert_workbench_meta({"workbench_model_target": target_column})
+        output_model.upsert_workbench_meta({"workbench_model_features": feature_list})
+        output_model.upsert_workbench_meta({"child_endpoints": child_endpoints})
+        output_model.onboard_with_args(ModelType.UQ_REGRESSOR, target_column, feature_list=feature_list)
+if __name__ == "__main__":
+    """Exercise the MetaModel Class"""
+    meta = MetaModel.create(
+        name="logd-meta",
+        child_endpoints=["logd-xgb", "logd-pytorch", "logd-chemprop"],
+        target_column="logd",
+        description="Meta model for LogD prediction",
+        tags=["meta", "logd", "ensemble"],
+    )
+    print(meta.summary())
+    # Create an endpoint for the meta model
+    end = meta.to_endpoint(tags=["meta", "logd"])
+    end.set_owner("BW")
+    end.auto_inference()
+    # Test loading an existing meta model
+    meta = MetaModel("logd-meta")
+    print(meta.details())

workbench/api/model.py CHANGED Viewed

@@ -10,7 +10,12 @@ from workbench.core.artifacts.artifact import Artifact
 from workbench.core.artifacts.model_core import ModelCore, ModelType, ModelFramework  # noqa: F401
 from workbench.core.transforms.model_to_endpoint.model_to_endpoint import ModelToEndpoint
 from workbench.api.endpoint import Endpoint
-from workbench.utils.model_utils import proximity_model_local, noise_model_local
+from workbench.utils.model_utils import (
+    proximity_model_local,
+    fingerprint_prox_model_local,
+    noise_model_local,
+    cleanlab_model_local,
+)
 class Model(ModelCore):
@@ -83,13 +88,38 @@ class Model(ModelCore):
         end.set_owner(self.get_owner())
         return end
-    def prox_model(self):
+    def prox_model(self, include_all_columns: bool = False):
         """Create a local Proximity Model for this Model
+        Args:
+              include_all_columns (bool): Include all DataFrame columns in results (default: False)
+        Returns:
+           FeatureSpaceProximity: A local FeatureSpaceProximity Model
+        """
+        return proximity_model_local(self, include_all_columns=include_all_columns)
+    def fp_prox_model(
+        self,
+        include_all_columns: bool = False,
+        radius: int = 2,
+        n_bits: int = 1024,
+        counts: bool = False,
+    ):
+        """Create a local Fingerprint Proximity Model for this Model
+        Args:
+           include_all_columns (bool): Include all DataFrame columns in results (default: False)
+           radius (int): Morgan fingerprint radius (default: 2)
+           n_bits (int): Number of bits for the fingerprint (default: 1024)
+           counts (bool): Use count fingerprints instead of binary (default: False)
         Returns:
-           Proximity: A local Proximity Model
+           FingerprintProximity: A local FingerprintProximity Model
         """
-        return proximity_model_local(self)
+        return fingerprint_prox_model_local(
+            self, include_all_columns=include_all_columns, radius=radius, n_bits=n_bits, counts=counts
+        )
     def noise_model(self):
         """Create a local Noise Model for this Model
@@ -99,6 +129,15 @@ class Model(ModelCore):
         """
         return noise_model_local(self)
+    def cleanlab_model(self):
+        """Create a CleanLearning model for this Model's training data.
+        Returns:
+           CleanLearning: A fitted cleanlab model. Use get_label_issues() to get
+           a DataFrame with id_column, label_quality, predicted_label, given_label, is_label_issue.
+        """
+        return cleanlab_model_local(self)
 if __name__ == "__main__":
     """Exercise the Model Class"""

workbench/core/artifacts/endpoint_core.py CHANGED Viewed

@@ -330,12 +330,8 @@ class EndpointCore(Artifact):
         self.details()
         return True
-    def auto_inference(self, capture: bool = False) -> pd.DataFrame:
-        """Run inference on the endpoint using FeatureSet data
-        Args:
-            capture (bool, optional): Capture the inference results and metrics (default=False)
-        """
+    def auto_inference(self) -> pd.DataFrame:
+        """Run inference on the endpoint using the test data from the model training view"""
         # Sanity Check that we have a model
         model = ModelCore(self.get_input())
@@ -343,22 +339,40 @@ class EndpointCore(Artifact):
             self.log.error("No model found for this endpoint. Returning empty DataFrame.")
             return pd.DataFrame()
-        # Now get the FeatureSet and make sure it exists
-        fs = FeatureSetCore(model.get_input())
-        if not fs.exists():
-            self.log.error("No FeatureSet found for this endpoint. Returning empty DataFrame.")
+        # Grab the evaluation data from the Model's training view
+        all_df = model.training_view().pull_dataframe()
+        eval_df = all_df[~all_df["training"]]
+        # Remove AWS created columns
+        aws_cols = ["write_time", "api_invocation_time", "is_deleted", "event_time"]
+        eval_df = eval_df.drop(columns=aws_cols, errors="ignore")
+        # Run inference
+        return self.inference(eval_df, "auto_inference")
+    def full_inference(self) -> pd.DataFrame:
+        """Run inference on the endpoint using all the data from the model training view"""
+        # Sanity Check that we have a model
+        model = ModelCore(self.get_input())
+        if not model.exists():
+            self.log.error("No model found for this endpoint. Returning empty DataFrame.")
             return pd.DataFrame()
-        # Grab the evaluation data from the FeatureSet
-        table = model.training_view().table
-        eval_df = fs.query(f'SELECT * FROM "{table}" where training = FALSE')
-        capture_name = "auto_inference" if capture else None
-        return self.inference(eval_df, capture_name, id_column=fs.id_column)
+        # Grab the full data from the Model's training view
+        eval_df = model.training_view().pull_dataframe()
+        # Remove AWS created columns
+        aws_cols = ["write_time", "api_invocation_time", "is_deleted", "event_time"]
+        eval_df = eval_df.drop(columns=aws_cols, errors="ignore")
+        # Run inference
+        return self.inference(eval_df, "full_inference")
     def inference(
         self, eval_df: pd.DataFrame, capture_name: str = None, id_column: str = None, drop_error_rows: bool = False
     ) -> pd.DataFrame:
-        """Run inference and compute performance metrics with optional capture
+        """Run inference on the Endpoint using the provided DataFrame
         Args:
             eval_df (pd.DataFrame): DataFrame to run predictions on (must have superset of features)
@@ -440,11 +454,14 @@ class EndpointCore(Artifact):
                 # Drop rows with NaN target values for metrics/plots
                 target_df = prediction_df.dropna(subset=[target])
+                # For multi-target models, prediction column is {target}_pred, otherwise "prediction"
+                pred_col = f"{target}_pred" if is_multi_target else "prediction"
                 # Compute per-target metrics
                 if model.model_type in [ModelType.REGRESSOR, ModelType.UQ_REGRESSOR, ModelType.ENSEMBLE_REGRESSOR]:
-                    target_metrics = self.regression_metrics(target, target_df)
+                    target_metrics = self.regression_metrics(target, target_df, prediction_col=pred_col)
                 elif model.model_type == ModelType.CLASSIFIER:
-                    target_metrics = self.classification_metrics(target, target_df)
+                    target_metrics = self.classification_metrics(target, target_df, prediction_col=pred_col)
                 else:
                     target_metrics = pd.DataFrame()
@@ -476,8 +493,8 @@ class EndpointCore(Artifact):
                         id_column,
                     )
-            # For UQ Models we also capture the uncertainty metrics
-            if model.model_type == ModelType.UQ_REGRESSOR:
+            # Capture uncertainty metrics if prediction_std is available (UQ, ChemProp, etc.)
+            if "prediction_std" in prediction_df.columns:
                 metrics = uq_metrics(prediction_df, primary_target)
                 self.param_store.upsert(f"/workbench/models/{model.name}/inference/{capture_name}", metrics)
@@ -525,22 +542,20 @@ class EndpointCore(Artifact):
         fs = FeatureSetCore(model.get_input())
         id_column = fs.id_column
-        # For UQ models, get UQ columns from training CV results and compute metrics
-        # Note: XGBoost training now saves all UQ columns (q_*, confidence, prediction_std)
-        additional_columns = []
-        if model_type == ModelType.UQ_REGRESSOR:
-            uq_columns = [col for col in out_of_fold_df.columns if col.startswith("q_") or col == "confidence"]
-            if uq_columns:
-                additional_columns = uq_columns
-                self.log.info(f"UQ columns from training: {', '.join(uq_columns)}")
-                primary_target = targets[0] if isinstance(targets, list) else targets
-                metrics = uq_metrics(out_of_fold_df, primary_target)
-                self.param_store.upsert(f"/workbench/models/{model.name}/inference/full_cross_fold", metrics)
         # Normalize targets to a list for iteration
         target_list = targets if isinstance(targets, list) else [targets]
         primary_target = target_list[0]
+        # Collect UQ columns (q_*, confidence) for additional tracking
+        additional_columns = [col for col in out_of_fold_df.columns if col.startswith("q_") or col == "confidence"]
+        if additional_columns:
+            self.log.info(f"UQ columns from training: {', '.join(additional_columns)}")
+        # Capture uncertainty metrics if prediction_std is available (UQ, ChemProp, etc.)
+        if "prediction_std" in out_of_fold_df.columns:
+            metrics = uq_metrics(out_of_fold_df, primary_target)
+            self.param_store.upsert(f"/workbench/models/{model.name}/inference/full_cross_fold", metrics)
         # For single-target models (99% of cases), just save as "full_cross_fold"
         # For multi-target models, save each as cv_{target} plus primary as "full_cross_fold"
         is_multi_target = len(target_list) > 1
@@ -549,11 +564,14 @@ class EndpointCore(Artifact):
             # Drop rows with NaN target values for metrics/plots
             target_df = out_of_fold_df.dropna(subset=[target])
+            # For multi-target models, prediction column is {target}_pred, otherwise "prediction"
+            pred_col = f"{target}_pred" if is_multi_target else "prediction"
             # Compute per-target metrics
             if model_type in [ModelType.REGRESSOR, ModelType.UQ_REGRESSOR, ModelType.ENSEMBLE_REGRESSOR]:
-                target_metrics = self.regression_metrics(target, target_df)
+                target_metrics = self.regression_metrics(target, target_df, prediction_col=pred_col)
             elif model_type == ModelType.CLASSIFIER:
-                target_metrics = self.classification_metrics(target, target_df)
+                target_metrics = self.classification_metrics(target, target_df, prediction_col=pred_col)
             else:
                 target_metrics = pd.DataFrame()
@@ -867,75 +885,39 @@ class EndpointCore(Artifact):
             target (str): Target column name
             id_column (str, optional): Name of the ID column
         """
-        # Start with ID column if present
+        cols = pred_results_df.columns
+        # Build output columns: id, target, prediction, prediction_std, UQ columns, proba columns
         output_columns = []
-        if id_column and id_column in pred_results_df.columns:
+        if id_column and id_column in cols:
             output_columns.append(id_column)
-        # Add target column if present
-        if target and target in pred_results_df.columns:
+        if target and target in cols:
             output_columns.append(target)
-        # Build the output DataFrame
-        output_df = pred_results_df[output_columns].copy() if output_columns else pd.DataFrame()
-        # For multi-task: map {target}_pred -> prediction, {target}_pred_std -> prediction_std
-        # For single-task: just grab prediction and prediction_std columns directly
-        pred_col = f"{target}_pred"
-        std_col = f"{target}_pred_std"
-        if pred_col in pred_results_df.columns:
-            # Multi-task columns exist
-            output_df["prediction"] = pred_results_df[pred_col]
-            if std_col in pred_results_df.columns:
-                output_df["prediction_std"] = pred_results_df[std_col]
-        else:
-            # Single-task: grab standard prediction columns
-            for col in ["prediction", "prediction_std"]:
-                if col in pred_results_df.columns:
-                    output_df[col] = pred_results_df[col]
-            # Also grab any _proba columns and UQ columns
-            for col in pred_results_df.columns:
-                if col.endswith("_proba") or col.startswith("q_") or col == "confidence":
-                    output_df[col] = pred_results_df[col]
+        output_columns += [c for c in ["prediction", "prediction_std"] if c in cols]
+        # Add UQ columns (q_*, confidence) and proba columns
+        output_columns += [c for c in cols if c.startswith("q_") or c == "confidence" or c.endswith("_proba")]
         # Write the predictions to S3
         output_file = f"{inference_capture_path}/inference_predictions.csv"
         self.log.info(f"Writing predictions to {output_file}")
-        wr.s3.to_csv(output_df, output_file, index=False)
+        wr.s3.to_csv(pred_results_df[output_columns], output_file, index=False)
-    def regression_metrics(self, target_column: str, prediction_df: pd.DataFrame) -> pd.DataFrame:
+    def regression_metrics(
+        self, target_column: str, prediction_df: pd.DataFrame, prediction_col: str = "prediction"
+    ) -> pd.DataFrame:
         """Compute the performance metrics for this Endpoint
         Args:
             target_column (str): Name of the target column
             prediction_df (pd.DataFrame): DataFrame with the prediction results
+            prediction_col (str): Name of the prediction column (default: "prediction")
         Returns:
             pd.DataFrame: DataFrame with the performance metrics
         """
-        # Sanity Check the prediction DataFrame
-        if prediction_df.empty:
-            self.log.warning("No predictions were made. Returning empty DataFrame.")
-            return pd.DataFrame()
-        # Check for prediction column
-        if "prediction" not in prediction_df.columns:
-            self.log.warning("No 'prediction' column found in DataFrame")
-            return pd.DataFrame()
-        # Check for NaN values in target or prediction columns
-        if prediction_df[target_column].isnull().any() or prediction_df["prediction"].isnull().any():
-            num_nan_target = prediction_df[target_column].isnull().sum()
-            num_nan_prediction = prediction_df["prediction"].isnull().sum()
-            self.log.warning(f"NaNs Found: {target_column} {num_nan_target} and prediction: {num_nan_prediction}.")
-            self.log.warning("Dropping NaN rows for metric computation.")
-            prediction_df = prediction_df.dropna(subset=[target_column, "prediction"])
-        # Compute the metrics using shared utilities
-        try:
-            return compute_regression_metrics(prediction_df, target_column)
-        except Exception as e:
-            self.log.warning(f"Error computing regression metrics: {str(e)}")
-            return pd.DataFrame()
+        return compute_regression_metrics(prediction_df, target_column, prediction_col)
     def residuals(self, target_column: str, prediction_df: pd.DataFrame) -> pd.DataFrame:
         """Add the residuals to the prediction DataFrame
@@ -965,58 +947,22 @@ class EndpointCore(Artifact):
         return prediction_df
-    @staticmethod
-    def validate_proba_columns(prediction_df: pd.DataFrame, class_labels: list, guessing: bool = False):
-        """Ensure probability columns are correctly aligned with class labels
-        Args:
-            prediction_df (pd.DataFrame): DataFrame with the prediction results
-            class_labels (list): List of class labels
-            guessing (bool, optional): Whether we're guessing the class labels. Defaults to False.
-        """
-        proba_columns = [col.replace("_proba", "") for col in prediction_df.columns if col.endswith("_proba")]
-        if sorted(class_labels) != sorted(proba_columns):
-            if guessing:
-                raise ValueError(f"_proba columns {proba_columns} != GUESSED class_labels {class_labels}!")
-            else:
-                raise ValueError(f"_proba columns {proba_columns} != class_labels {class_labels}!")
-    def classification_metrics(self, target_column: str, prediction_df: pd.DataFrame) -> pd.DataFrame:
+    def classification_metrics(
+        self, target_column: str, prediction_df: pd.DataFrame, prediction_col: str = "prediction"
+    ) -> pd.DataFrame:
         """Compute the performance metrics for this Endpoint
         Args:
             target_column (str): Name of the target column
             prediction_df (pd.DataFrame): DataFrame with the prediction results
+            prediction_col (str): Name of the prediction column (default: "prediction")
         Returns:
             pd.DataFrame: DataFrame with the performance metrics
         """
-        # Check for prediction column
-        if "prediction" not in prediction_df.columns:
-            self.log.warning("No 'prediction' column found in DataFrame")
-            return pd.DataFrame()
-        # Drop rows with NaN predictions (can't compute metrics on missing predictions)
-        nan_mask = prediction_df["prediction"].isna()
-        if nan_mask.any():
-            n_nan = nan_mask.sum()
-            self.log.warning(f"Dropping {n_nan} rows with NaN predictions for metrics calculation")
-            prediction_df = prediction_df[~nan_mask].copy()
-        # Get the class labels from the model
+        # Get class labels from the model (metrics_utils will infer if None)
         class_labels = ModelCore(self.model_name).class_labels()
-        if class_labels is None:
-            self.log.warning(
-                "Class labels not found in the model. Guessing class labels from the prediction DataFrame."
-            )
-            class_labels = prediction_df[target_column].unique().tolist()
-            self.validate_proba_columns(prediction_df, class_labels, guessing=True)
-        else:
-            self.validate_proba_columns(prediction_df, class_labels)
-        # Compute the metrics using shared utilities (returns per-class + 'all' row)
-        return compute_classification_metrics(prediction_df, target_column, class_labels)
+        return compute_classification_metrics(prediction_df, target_column, class_labels, prediction_col)
     def generate_confusion_matrix(self, target_column: str, prediction_df: pd.DataFrame) -> pd.DataFrame:
         """Compute the confusion matrix for this Endpoint

workbench/core/artifacts/feature_set_core.py CHANGED Viewed

@@ -574,7 +574,7 @@ class FeatureSetCore(Artifact):
         TrainingView.create_with_sql(self, sql_query=custom_sql, id_column=self.id_column)
-    @deprecated(version=0.9)
+    @deprecated(version="0.9")
     def set_training_filter(self, filter_expression: Optional[str] = None):
         """Set a filter expression for the training view for this FeatureSet

workbench 0.8.212__py3-none-any.whl → 0.8.217__py3-none-any.whl

workbench 0.8.212py3-none-any.whl → 0.8.217py3-none-any.whl