workbench 0.8.201__py3-none-any.whl → 0.8.203__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (34) hide show
  1. workbench/api/df_store.py +17 -108
  2. workbench/api/feature_set.py +41 -7
  3. workbench/api/parameter_store.py +3 -52
  4. workbench/core/artifacts/artifact.py +5 -5
  5. workbench/core/artifacts/df_store_core.py +114 -0
  6. workbench/core/artifacts/endpoint_core.py +203 -58
  7. workbench/core/artifacts/model_core.py +11 -7
  8. workbench/core/artifacts/parameter_store_core.py +98 -0
  9. workbench/core/transforms/features_to_model/features_to_model.py +27 -13
  10. workbench/core/transforms/pandas_transforms/pandas_to_features.py +11 -2
  11. workbench/model_scripts/chemprop/chemprop.template +297 -295
  12. workbench/model_scripts/chemprop/generated_model_script.py +300 -298
  13. workbench/model_scripts/custom_models/uq_models/ensemble_xgb.template +11 -5
  14. workbench/model_scripts/custom_models/uq_models/meta_uq.template +11 -5
  15. workbench/model_scripts/custom_models/uq_models/ngboost.template +11 -5
  16. workbench/model_scripts/ensemble_xgb/ensemble_xgb.template +11 -5
  17. workbench/model_scripts/pytorch_model/generated_model_script.py +278 -128
  18. workbench/model_scripts/pytorch_model/pytorch.template +273 -123
  19. workbench/model_scripts/uq_models/generated_model_script.py +19 -10
  20. workbench/model_scripts/uq_models/mapie.template +17 -8
  21. workbench/model_scripts/xgb_model/generated_model_script.py +38 -9
  22. workbench/model_scripts/xgb_model/xgb_model.template +34 -5
  23. workbench/resources/open_source_api.key +1 -1
  24. workbench/utils/chemprop_utils.py +38 -1
  25. workbench/utils/pytorch_utils.py +38 -8
  26. workbench/web_interface/components/model_plot.py +7 -1
  27. {workbench-0.8.201.dist-info → workbench-0.8.203.dist-info}/METADATA +2 -2
  28. {workbench-0.8.201.dist-info → workbench-0.8.203.dist-info}/RECORD +32 -32
  29. workbench/core/cloud_platform/aws/aws_df_store.py +0 -404
  30. workbench/core/cloud_platform/aws/aws_parameter_store.py +0 -296
  31. {workbench-0.8.201.dist-info → workbench-0.8.203.dist-info}/WHEEL +0 -0
  32. {workbench-0.8.201.dist-info → workbench-0.8.203.dist-info}/entry_points.txt +0 -0
  33. {workbench-0.8.201.dist-info → workbench-0.8.203.dist-info}/licenses/LICENSE +0 -0
  34. {workbench-0.8.201.dist-info → workbench-0.8.203.dist-info}/top_level.txt +0 -0
@@ -68,6 +68,15 @@ class PandasToFeatures(Transform):
68
68
  self.output_df = input_df.copy()
69
69
  self.one_hot_columns = one_hot_columns or []
70
70
 
71
+ # Warn about known AWS Iceberg bug with event_time_column
72
+ if event_time_column is not None:
73
+ self.log.warning(
74
+ f"event_time_column='{event_time_column}' specified. Note: AWS has a known bug with "
75
+ "Iceberg FeatureGroups where varying event times across multiple days can cause "
76
+ "duplicate rows in the offline store. Setting event_time_column=None."
77
+ )
78
+ self.event_time_column = None
79
+
71
80
  # Now Prepare the DataFrame for its journey into an AWS FeatureGroup
72
81
  self.prep_dataframe()
73
82
 
@@ -400,7 +409,7 @@ class PandasToFeatures(Transform):
400
409
 
401
410
  # Set Hold Out Ids (if we got them during creation)
402
411
  if self.incoming_hold_out_ids:
403
- self.output_feature_set.set_training_holdouts(self.id_column, self.incoming_hold_out_ids)
412
+ self.output_feature_set.set_training_holdouts(self.incoming_hold_out_ids)
404
413
 
405
414
  def ensure_feature_group_created(self, feature_group):
406
415
  status = feature_group.describe().get("FeatureGroupStatus")
@@ -462,7 +471,7 @@ if __name__ == "__main__":
462
471
 
463
472
  # Create my DF to Feature Set Transform (with one-hot encoding)
464
473
  df_to_features = PandasToFeatures("test_features")
465
- df_to_features.set_input(data_df, id_column="id", one_hot_columns=["food"])
474
+ df_to_features.set_input(data_df, id_column="id", event_time_column="date", one_hot_columns=["food"])
466
475
  df_to_features.set_output_tags(["test", "small"])
467
476
  df_to_features.transform()
468
477