PyPI - workbench - Versions diffs - 0.8.202__py3-none-any.whl → 0.8.220__py3-none-any.whl - Mend

workbench 0.8.202py3-none-any.whl → 0.8.220py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of workbench might be problematic. Click here for more details.

Files changed (84) hide show

workbench/algorithms/dataframe/compound_dataset_overlap.py +321 -0
workbench/algorithms/dataframe/feature_space_proximity.py +168 -75
workbench/algorithms/dataframe/fingerprint_proximity.py +421 -85
workbench/algorithms/dataframe/projection_2d.py +44 -21
workbench/algorithms/dataframe/proximity.py +78 -150
workbench/algorithms/graph/light/proximity_graph.py +5 -5
workbench/algorithms/models/cleanlab_model.py +382 -0
workbench/algorithms/models/noise_model.py +388 -0
workbench/algorithms/sql/outliers.py +3 -3
workbench/api/__init__.py +3 -0
workbench/api/df_store.py +17 -108
workbench/api/endpoint.py +13 -11
workbench/api/feature_set.py +111 -8
workbench/api/meta_model.py +289 -0
workbench/api/model.py +45 -12
workbench/api/parameter_store.py +3 -52
workbench/cached/cached_model.py +4 -4
workbench/core/artifacts/artifact.py +5 -5
workbench/core/artifacts/df_store_core.py +114 -0
workbench/core/artifacts/endpoint_core.py +228 -237
workbench/core/artifacts/feature_set_core.py +185 -230
workbench/core/artifacts/model_core.py +34 -26
workbench/core/artifacts/parameter_store_core.py +98 -0
workbench/core/pipelines/pipeline_executor.py +1 -1
workbench/core/transforms/features_to_model/features_to_model.py +22 -10
workbench/core/transforms/model_to_endpoint/model_to_endpoint.py +41 -10
workbench/core/transforms/pandas_transforms/pandas_to_features.py +11 -2
workbench/model_script_utils/model_script_utils.py +339 -0
workbench/model_script_utils/pytorch_utils.py +405 -0
workbench/model_script_utils/uq_harness.py +278 -0
workbench/model_scripts/chemprop/chemprop.template +428 -631
workbench/model_scripts/chemprop/generated_model_script.py +432 -635
workbench/model_scripts/chemprop/model_script_utils.py +339 -0
workbench/model_scripts/chemprop/requirements.txt +2 -10
workbench/model_scripts/custom_models/chem_info/fingerprints.py +87 -46
workbench/model_scripts/custom_models/proximity/feature_space_proximity.py +194 -0
workbench/model_scripts/custom_models/proximity/feature_space_proximity.template +6 -6
workbench/model_scripts/custom_models/uq_models/feature_space_proximity.py +194 -0
workbench/model_scripts/meta_model/generated_model_script.py +209 -0
workbench/model_scripts/meta_model/meta_model.template +209 -0
workbench/model_scripts/pytorch_model/generated_model_script.py +374 -613
workbench/model_scripts/pytorch_model/model_script_utils.py +339 -0
workbench/model_scripts/pytorch_model/pytorch.template +370 -609
workbench/model_scripts/pytorch_model/pytorch_utils.py +405 -0
workbench/model_scripts/pytorch_model/requirements.txt +1 -1
workbench/model_scripts/pytorch_model/uq_harness.py +278 -0
workbench/model_scripts/script_generation.py +6 -5
workbench/model_scripts/uq_models/generated_model_script.py +65 -422
workbench/model_scripts/xgb_model/generated_model_script.py +372 -395
workbench/model_scripts/xgb_model/model_script_utils.py +339 -0
workbench/model_scripts/xgb_model/uq_harness.py +278 -0
workbench/model_scripts/xgb_model/xgb_model.template +366 -396
workbench/repl/workbench_shell.py +0 -5
workbench/resources/open_source_api.key +1 -1
workbench/scripts/endpoint_test.py +2 -2
workbench/scripts/meta_model_sim.py +35 -0
workbench/scripts/training_test.py +85 -0
workbench/utils/chem_utils/fingerprints.py +87 -46
workbench/utils/chem_utils/projections.py +16 -6
workbench/utils/chemprop_utils.py +36 -655
workbench/utils/meta_model_simulator.py +499 -0
workbench/utils/metrics_utils.py +256 -0
workbench/utils/model_utils.py +192 -54
workbench/utils/pytorch_utils.py +33 -472
workbench/utils/shap_utils.py +1 -55
workbench/utils/xgboost_local_crossfold.py +267 -0
workbench/utils/xgboost_model_utils.py +49 -356
workbench/web_interface/components/model_plot.py +7 -1
workbench/web_interface/components/plugins/model_details.py +30 -68
workbench/web_interface/components/plugins/scatter_plot.py +4 -8
{workbench-0.8.202.dist-info → workbench-0.8.220.dist-info}/METADATA +6 -5
{workbench-0.8.202.dist-info → workbench-0.8.220.dist-info}/RECORD +76 -60
{workbench-0.8.202.dist-info → workbench-0.8.220.dist-info}/entry_points.txt +2 -0
workbench/core/cloud_platform/aws/aws_df_store.py +0 -404
workbench/core/cloud_platform/aws/aws_parameter_store.py +0 -296
workbench/model_scripts/custom_models/meta_endpoints/example.py +0 -53
workbench/model_scripts/custom_models/proximity/proximity.py +0 -410
workbench/model_scripts/custom_models/uq_models/meta_uq.template +0 -377
workbench/model_scripts/custom_models/uq_models/proximity.py +0 -410
workbench/model_scripts/uq_models/mapie.template +0 -605
workbench/model_scripts/uq_models/requirements.txt +0 -1
{workbench-0.8.202.dist-info → workbench-0.8.220.dist-info}/WHEEL +0 -0
{workbench-0.8.202.dist-info → workbench-0.8.220.dist-info}/licenses/LICENSE +0 -0
{workbench-0.8.202.dist-info → workbench-0.8.220.dist-info}/top_level.txt +0 -0

workbench/core/artifacts/model_core.py CHANGED Viewed

@@ -21,7 +21,7 @@ from workbench.utils.aws_utils import newest_path, pull_s3_data
 from workbench.utils.s3_utils import compute_s3_object_hash
 from workbench.utils.shap_utils import shap_values_data, shap_feature_importance
 from workbench.utils.deprecated_utils import deprecated
-from workbench.utils.model_utils import proximity_model
+from workbench.utils.model_utils import published_proximity_model, get_model_hyperparameters
 class ModelType(Enum):
@@ -44,9 +44,10 @@ class ModelFramework(Enum):
     SKLEARN = "sklearn"
     XGBOOST = "xgboost"
     LIGHTGBM = "lightgbm"
-    PYTORCH_TABULAR = "pytorch_tabular"
+    PYTORCH = "pytorch"
     CHEMPROP = "chemprop"
     TRANSFORMER = "transformer"
+    META = "meta"
     UNKNOWN = "unknown"
@@ -62,7 +63,8 @@ class ModelImages:
         "inference": "py312-general-ml-inference",
         "pytorch_training": "py312-pytorch-training",
         "pytorch_inference": "py312-pytorch-inference",
-        "meta-endpoint": "py312-meta-endpoint",
+        "meta_training": "py312-meta-training",
+        "meta_inference": "py312-meta-inference",
     }
     @classmethod
@@ -263,21 +265,25 @@ class ModelCore(Artifact):
         else:
             self.log.important(f"No inference data found for {self.model_name}!")
-    def get_inference_metrics(self, capture_name: str = "latest") -> Union[pd.DataFrame, None]:
+    def get_inference_metrics(self, capture_name: str = "auto") -> Union[pd.DataFrame, None]:
         """Retrieve the inference performance metrics for this model
         Args:
-            capture_name (str, optional): Specific capture_name or "training" (default: "latest")
+            capture_name (str, optional): Specific capture_name (default: "auto")
         Returns:
             pd.DataFrame: DataFrame of the Model Metrics
         Note:
-            If a capture_name isn't specified this will try to return something reasonable
+            If a capture_name isn't specified this will try to the 'first' available metrics
         """
         # Try to get the auto_capture 'training_holdout' or the training
-        if capture_name == "latest":
-            metrics_df = self.get_inference_metrics("auto_inference")
-            return metrics_df if metrics_df is not None else self.get_inference_metrics("model_training")
+        if capture_name == "auto":
+            metric_list = self.list_inference_runs()
+            if metric_list:
+                return self.get_inference_metrics(metric_list[0])
+            else:
+                self.log.warning(f"No performance metrics found for {self.model_name}!")
+                return None
         # Grab the metrics captured during model training (could return None)
         if capture_name == "model_training":
@@ -299,11 +305,11 @@ class ModelCore(Artifact):
                 self.log.warning(f"Performance metrics {capture_name} not found for {self.model_name}!")
                 return None
-    def confusion_matrix(self, capture_name: str = "latest") -> Union[pd.DataFrame, None]:
+    def confusion_matrix(self, capture_name: str = "auto") -> Union[pd.DataFrame, None]:
         """Retrieve the confusion_matrix for this model
         Args:
-            capture_name (str, optional): Specific capture_name or "training" (default: "latest")
+            capture_name (str, optional): Specific capture_name or "training" (default: "auto")
         Returns:
             pd.DataFrame: DataFrame of the Confusion Matrix (might be None)
         """
@@ -315,7 +321,7 @@ class ModelCore(Artifact):
             raise ValueError(error_msg)
         # Grab the metrics from the Workbench Metadata (try inference first, then training)
-        if capture_name == "latest":
+        if capture_name == "auto":
             cm = self.confusion_matrix("auto_inference")
             return cm if cm is not None else self.confusion_matrix("model_training")
@@ -537,6 +543,17 @@ class ModelCore(Artifact):
         else:
             self.log.error(f"Model {self.model_name} is not a classifier!")
+    def summary(self) -> dict:
+        """Summary information about this Model
+        Returns:
+            dict: Dictionary of summary information about this Model
+        """
+        self.log.info("Computing Model Summary...")
+        summary = super().summary()
+        summary["hyperparameters"] = get_model_hyperparameters(self)
+        return summary
     def details(self) -> dict:
         """Additional Details about this Model
@@ -561,6 +578,7 @@ class ModelCore(Artifact):
         details["status"] = self.latest_model["ModelPackageStatus"]
         details["approval_status"] = self.latest_model.get("ModelApprovalStatus", "unknown")
         details["image"] = self.container_image().split("/")[-1]  # Shorten the image uri
+        details["hyperparameters"] = get_model_hyperparameters(self)
         # Grab the inference and container info
         inference_spec = self.latest_model["InferenceSpecification"]
@@ -571,16 +589,6 @@ class ModelCore(Artifact):
         details["transform_types"] = inference_spec["SupportedTransformInstanceTypes"]
         details["content_types"] = inference_spec["SupportedContentTypes"]
         details["response_types"] = inference_spec["SupportedResponseMIMETypes"]
-        details["model_metrics"] = self.get_inference_metrics()
-        if self.model_type == ModelType.CLASSIFIER:
-            details["confusion_matrix"] = self.confusion_matrix()
-            details["predictions"] = None
-        elif self.model_type in [ModelType.REGRESSOR, ModelType.UQ_REGRESSOR, ModelType.ENSEMBLE_REGRESSOR]:
-            details["confusion_matrix"] = None
-            details["predictions"] = self.get_inference_predictions()
-        else:
-            details["confusion_matrix"] = None
-            details["predictions"] = None
         # Grab the inference metadata
         details["inference_meta"] = self.get_inference_metadata()
@@ -869,7 +877,7 @@ class ModelCore(Artifact):
             return self.df_store.get(f"/workbench/models/{self.name}/shap_data")
         else:
             # Loop over the SHAP data and return a dict of DataFrames
-            shap_dfs = self.df_store.list_subfiles(f"/workbench/models/{self.name}/shap_data")
+            shap_dfs = self.df_store.list(f"/workbench/models/{self.name}/shap_data")
             shap_data = {}
             for df_location in shap_dfs:
                 key = df_location.split("/")[-1]
@@ -888,19 +896,19 @@ class ModelCore(Artifact):
         except (KeyError, IndexError, TypeError):
             return None
-    def publish_prox_model(self, prox_model_name: str = None, track_columns: list = None):
+    def publish_prox_model(self, prox_model_name: str = None, include_all_columns: bool = False):
         """Create and publish a Proximity Model for this Model
         Args:
             prox_model_name (str, optional): Name of the Proximity Model (if not specified, a name will be generated)
-            track_columns (list, optional): List of columns to track in the Proximity Model.
+            include_all_columns (bool): Include all DataFrame columns in results (default: False)
         Returns:
             Model: The published Proximity Model
         """
         if prox_model_name is None:
             prox_model_name = self.model_name + "-prox"
-        return proximity_model(self, prox_model_name, track_columns=track_columns)
+        return published_proximity_model(self, prox_model_name, include_all_columns=include_all_columns)
     def delete(self):
         """Delete the Model Packages and the Model Group"""

workbench/core/artifacts/parameter_store_core.py ADDED Viewed

@@ -0,0 +1,98 @@
+"""ParameterStoreCore: Manages Workbench parameters in a Cloud Based Parameter Store."""
+import logging
+# Workbench Imports
+from workbench.core.cloud_platform.aws.aws_account_clamp import AWSAccountClamp
+# Workbench Bridges Import
+from workbench_bridges.api import ParameterStore as BridgesParameterStore
+class ParameterStoreCore(BridgesParameterStore):
+    """ParameterStoreCore: Manages Workbench parameters in a Cloud Based Parameter Store.
+    Common Usage:
+        ```python
+        params = ParameterStoreCore()
+        # List Parameters
+        params.list()
+        ['/workbench/abalone_info',
+         '/workbench/my_data',
+         '/workbench/test',
+         '/workbench/pipelines/my_pipeline']
+        # Add Key
+        params.upsert("key", "value")
+        value = params.get("key")
+        # Add any data (lists, dictionaries, etc..)
+        my_data = {"key": "value", "number": 4.2, "list": [1,2,3]}
+        params.upsert("my_data", my_data)
+        # Retrieve data
+        return_value = params.get("my_data")
+        pprint(return_value)
+        {'key': 'value', 'list': [1, 2, 3], 'number': 4.2}
+        # Delete parameters
+        param_store.delete("my_data")
+        ```
+    """
+    def __init__(self):
+        """ParameterStoreCore Init Method"""
+        session = AWSAccountClamp().boto3_session
+        # Initialize parent with workbench config
+        super().__init__(boto3_session=session)
+        self.log = logging.getLogger("workbench")
+if __name__ == "__main__":
+    """Exercise the ParameterStoreCore Class"""
+    # Create a ParameterStoreCore manager
+    param_store = ParameterStoreCore()
+    # List the parameters
+    print("Listing Parameters...")
+    print(param_store.list())
+    # Add a new parameter
+    param_store.upsert("/workbench/test", "value")
+    # Get the parameter
+    print(f"Getting parameter 'test': {param_store.get('/workbench/test')}")
+    # Add a dictionary as a parameter
+    sample_dict = {"key": "str_value", "awesome_value": 4.2}
+    param_store.upsert("/workbench/my_data", sample_dict)
+    # Retrieve the parameter as a dictionary
+    retrieved_value = param_store.get("/workbench/my_data")
+    print("Retrieved value:", retrieved_value)
+    # List the parameters
+    print("Listing Parameters...")
+    print(param_store.list())
+    # List the parameters with a prefix
+    print("Listing Parameters with prefix '/workbench':")
+    print(param_store.list("/workbench"))
+    # Delete the parameters
+    param_store.delete("/workbench/test")
+    param_store.delete("/workbench/my_data")
+    # Out of scope tests
+    param_store.upsert("test", "value")
+    param_store.delete("test")
+    # Recursive delete test
+    param_store.upsert("/workbench/test/test1", "value1")
+    param_store.upsert("/workbench/test/test2", "value2")
+    param_store.delete_recursive("workbench/test/")

workbench/core/pipelines/pipeline_executor.py CHANGED Viewed

@@ -123,7 +123,7 @@ class PipelineExecutor:
                 if "model" in workbench_objects and (not subset or "endpoint" in subset):
                     workbench_objects["model"].to_endpoint(**kwargs)
                     endpoint = Endpoint(kwargs["name"])
-                    endpoint.auto_inference(capture=True)
+                    endpoint.auto_inference()
             # Found something weird
             else:

workbench/core/transforms/features_to_model/features_to_model.py CHANGED Viewed

@@ -1,6 +1,7 @@
 """FeaturesToModel: Train/Create a Model from a Feature Set"""
 from pathlib import Path
+from typing import Union
 from sagemaker.estimator import Estimator
 import awswrangler as wr
 from datetime import datetime, timezone
@@ -83,12 +84,17 @@ class FeaturesToModel(Transform):
         self.inference_arch = inference_arch
     def transform_impl(
-        self, target_column: str, description: str = None, feature_list: list = None, train_all_data=False, **kwargs
+        self,
+        target_column: Union[str, list[str]],
+        description: str = None,
+        feature_list: list = None,
+        train_all_data=False,
+        **kwargs,
     ):
         """Generic Features to Model: Note you should create a new class and inherit from
         this one to include specific logic for your Feature Set/Model
         Args:
-            target_column (str): Column name of the target variable
+            target_column (str or list[str]): Column name(s) of the target variable(s)
             description (str): Description of the model (optional)
             feature_list (list[str]): A list of columns for the features (default None, will try to guess)
             train_all_data (bool): Train on ALL (100%) of the data (default False)
@@ -105,9 +111,11 @@ class FeaturesToModel(Transform):
         s3_training_path = feature_set.create_s3_training_data()
         self.log.info(f"Created new training data {s3_training_path}...")
-        # Report the target column
+        # Report the target column(s)
         self.target_column = target_column
-        self.log.info(f"Target column: {self.target_column}")
+        # Normalize target_column to a list for internal use
+        target_list = [target_column] if isinstance(target_column, str) else (target_column or [])
+        self.log.info(f"Target column(s): {self.target_column}")
         # Did they specify a feature list?
         if feature_list:
@@ -134,7 +142,7 @@ class FeaturesToModel(Transform):
                 "is_deleted",
                 "event_time",
                 "training",
-            ] + [self.target_column]
+            ] + target_list
             feature_list = [c for c in all_columns if c not in filter_list]
             # AWS Feature Store has 3 user column types (String, Integral, Fractional)
@@ -157,12 +165,14 @@ class FeaturesToModel(Transform):
         self.log.important(f"Feature List for Modeling: {self.model_feature_list}")
         # Set up our parameters for the model script
+        # ChemProp expects target_column as a list; other templates expect a string
+        target_for_template = target_list if self.model_framework == ModelFramework.CHEMPROP else self.target_column
         template_params = {
             "model_imports": self.model_import_str,
             "model_type": self.model_type,
             "model_framework": self.model_framework,
             "model_class": self.model_class,
-            "target_column": self.target_column,
+            "target_column": target_for_template,
             "feature_list": self.model_feature_list,
             "compressed_features": feature_set.get_compressed_features(),
             "model_metrics_s3_path": self.model_training_root,
@@ -202,11 +212,13 @@ class FeaturesToModel(Transform):
         # Metric Definitions for Classification
         elif self.model_type == ModelType.CLASSIFIER:
             # We need to get creative with the Classification Metrics
+            # Note: Classification only supports single target
+            class_target = target_list[0] if target_list else self.target_column
             # Grab all the target column class values (class labels)
             table = feature_set.data_source.table
-            self.class_labels = feature_set.query(f'select DISTINCT {self.target_column} FROM "{table}"')[
-                self.target_column
+            self.class_labels = feature_set.query(f'select DISTINCT {class_target} FROM "{table}"')[
+                class_target
             ].to_list()
             # Sanity check on the targets
@@ -216,7 +228,7 @@ class FeaturesToModel(Transform):
                 raise ValueError(msg)
             # Dynamically create the metric definitions
-            metrics = ["precision", "recall", "f1"]
+            metrics = ["precision", "recall", "f1", "support"]
             metric_definitions = []
             for t in self.class_labels:
                 for m in metrics:
@@ -242,7 +254,7 @@ class FeaturesToModel(Transform):
         image = ModelImages.get_image_uri(self.sm_session.boto_region_name, self.training_image)
         # Use GPU instance for ChemProp/PyTorch, CPU for others
-        if self.model_framework in [ModelFramework.CHEMPROP, ModelFramework.PYTORCH_TABULAR]:
+        if self.model_framework in [ModelFramework.CHEMPROP, ModelFramework.PYTORCH]:
             train_instance_type = "ml.g6.xlarge"  # NVIDIA L4 GPU, ~$0.80/hr
             self.log.important(f"Using GPU instance {train_instance_type} for {self.model_framework.value}")
         else:

workbench/core/transforms/model_to_endpoint/model_to_endpoint.py CHANGED Viewed

@@ -1,6 +1,7 @@
 """ModelToEndpoint: Deploy an Endpoint for a Model"""
 import time
+from botocore.exceptions import ClientError
 from sagemaker import ModelPackage
 from sagemaker.serializers import CSVSerializer
 from sagemaker.deserializers import CSVDeserializer
@@ -102,10 +103,21 @@ class ModelToEndpoint(Transform):
         # Is this a serverless deployment?
         serverless_config = None
         if self.serverless:
+            # For PyTorch or ChemProp we need at least 4GB of memory
+            from workbench.api import ModelFramework
+            self.log.info(f"Model Framework: {workbench_model.model_framework}")
+            if workbench_model.model_framework in [ModelFramework.PYTORCH, ModelFramework.CHEMPROP]:
+                if mem_size < 4096:
+                    self.log.important(
+                        f"{workbench_model.model_framework} needs at least 4GB of memory (setting to 4GB)"
+                    )
+                    mem_size = 4096
             serverless_config = ServerlessInferenceConfig(
                 memory_size_in_mb=mem_size,
                 max_concurrency=max_concurrency,
             )
+            self.log.important(f"Serverless Config: Memory={mem_size}MB, MaxConcurrency={max_concurrency}")
         # Configure data capture if requested (and not serverless)
         data_capture_config = None
@@ -126,16 +138,35 @@ class ModelToEndpoint(Transform):
         # Deploy the Endpoint
         self.log.important(f"Deploying the Endpoint {self.output_name}...")
-        model_package.deploy(
-            initial_instance_count=1,
-            instance_type=self.instance_type,
-            serverless_inference_config=serverless_config,
-            endpoint_name=self.output_name,
-            serializer=CSVSerializer(),
-            deserializer=CSVDeserializer(),
-            data_capture_config=data_capture_config,
-            tags=aws_tags,
-        )
+        try:
+            model_package.deploy(
+                initial_instance_count=1,
+                instance_type=self.instance_type,
+                serverless_inference_config=serverless_config,
+                endpoint_name=self.output_name,
+                serializer=CSVSerializer(),
+                deserializer=CSVDeserializer(),
+                data_capture_config=data_capture_config,
+                tags=aws_tags,
+            )
+        except ClientError as e:
+            # Check if this is the "endpoint config already exists" error
+            if "Cannot create already existing endpoint configuration" in str(e):
+                self.log.warning("Endpoint config already exists, deleting and retrying...")
+                self.sm_client.delete_endpoint_config(EndpointConfigName=self.output_name)
+                # Retry the deploy
+                model_package.deploy(
+                    initial_instance_count=1,
+                    instance_type=self.instance_type,
+                    serverless_inference_config=serverless_config,
+                    endpoint_name=self.output_name,
+                    serializer=CSVSerializer(),
+                    deserializer=CSVDeserializer(),
+                    data_capture_config=data_capture_config,
+                    tags=aws_tags,
+                )
+            else:
+                raise
     def post_transform(self, **kwargs):
         """Post-Transform: Calling onboard() for the Endpoint"""

workbench/core/transforms/pandas_transforms/pandas_to_features.py CHANGED Viewed

@@ -68,6 +68,15 @@ class PandasToFeatures(Transform):
         self.output_df = input_df.copy()
         self.one_hot_columns = one_hot_columns or []
+        # Warn about known AWS Iceberg bug with event_time_column
+        if event_time_column is not None:
+            self.log.warning(
+                f"event_time_column='{event_time_column}' specified. Note: AWS has a known bug with "
+                "Iceberg FeatureGroups where varying event times across multiple days can cause "
+                "duplicate rows in the offline store. Setting event_time_column=None."
+            )
+            self.event_time_column = None
         # Now Prepare the DataFrame for its journey into an AWS FeatureGroup
         self.prep_dataframe()
@@ -400,7 +409,7 @@ class PandasToFeatures(Transform):
         # Set Hold Out Ids (if we got them during creation)
         if self.incoming_hold_out_ids:
-            self.output_feature_set.set_training_holdouts(self.id_column, self.incoming_hold_out_ids)
+            self.output_feature_set.set_training_holdouts(self.incoming_hold_out_ids)
     def ensure_feature_group_created(self, feature_group):
         status = feature_group.describe().get("FeatureGroupStatus")
@@ -462,7 +471,7 @@ if __name__ == "__main__":
     # Create my DF to Feature Set Transform (with one-hot encoding)
     df_to_features = PandasToFeatures("test_features")
-    df_to_features.set_input(data_df, id_column="id", one_hot_columns=["food"])
+    df_to_features.set_input(data_df, id_column="id", event_time_column="date", one_hot_columns=["food"])
     df_to_features.set_output_tags(["test", "small"])
     df_to_features.transform()

workbench 0.8.202__py3-none-any.whl → 0.8.220__py3-none-any.whl

Potentially problematic release.

workbench 0.8.202py3-none-any.whl → 0.8.220py3-none-any.whl