PyPI - workbench - Versions diffs - 0.8.201__py3-none-any.whl → 0.8.204__py3-none-any.whl - Mend

workbench 0.8.201py3-none-any.whl → 0.8.204py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (35) hide show

workbench/core/transforms/model_to_endpoint/model_to_endpoint.py CHANGED Viewed

@@ -102,10 +102,21 @@ class ModelToEndpoint(Transform):
         # Is this a serverless deployment?
         serverless_config = None
         if self.serverless:
+            # For PyTorch or ChemProp we need at least 4GB of memory
+            from workbench.api import ModelFramework
+            self.log.info(f"Model Framework: {workbench_model.model_framework}")
+            if workbench_model.model_framework in [ModelFramework.PYTORCH_TABULAR, ModelFramework.CHEMPROP]:
+                if mem_size < 4096:
+                    self.log.important(
+                        f"{workbench_model.model_framework} needs at least 4GB of memory (setting to 4GB)"
+                    )
+                    mem_size = 4096
             serverless_config = ServerlessInferenceConfig(
                 memory_size_in_mb=mem_size,
                 max_concurrency=max_concurrency,
             )
+            self.log.important(f"Serverless Config: Memory={mem_size}MB, MaxConcurrency={max_concurrency}")
         # Configure data capture if requested (and not serverless)
         data_capture_config = None

workbench/core/transforms/pandas_transforms/pandas_to_features.py CHANGED Viewed

@@ -68,6 +68,15 @@ class PandasToFeatures(Transform):
         self.output_df = input_df.copy()
         self.one_hot_columns = one_hot_columns or []
+        # Warn about known AWS Iceberg bug with event_time_column
+        if event_time_column is not None:
+            self.log.warning(
+                f"event_time_column='{event_time_column}' specified. Note: AWS has a known bug with "
+                "Iceberg FeatureGroups where varying event times across multiple days can cause "
+                "duplicate rows in the offline store. Setting event_time_column=None."
+            )
+            self.event_time_column = None
         # Now Prepare the DataFrame for its journey into an AWS FeatureGroup
         self.prep_dataframe()
@@ -400,7 +409,7 @@ class PandasToFeatures(Transform):
         # Set Hold Out Ids (if we got them during creation)
         if self.incoming_hold_out_ids:
-            self.output_feature_set.set_training_holdouts(self.id_column, self.incoming_hold_out_ids)
+            self.output_feature_set.set_training_holdouts(self.incoming_hold_out_ids)
     def ensure_feature_group_created(self, feature_group):
         status = feature_group.describe().get("FeatureGroupStatus")
@@ -462,7 +471,7 @@ if __name__ == "__main__":
     # Create my DF to Feature Set Transform (with one-hot encoding)
     df_to_features = PandasToFeatures("test_features")
-    df_to_features.set_input(data_df, id_column="id", one_hot_columns=["food"])
+    df_to_features.set_input(data_df, id_column="id", event_time_column="date", one_hot_columns=["food"])
     df_to_features.set_output_tags(["test", "small"])
     df_to_features.transform()

workbench 0.8.201__py3-none-any.whl → 0.8.204__py3-none-any.whl

workbench 0.8.201py3-none-any.whl → 0.8.204py3-none-any.whl