workbench 0.8.201__py3-none-any.whl → 0.8.204__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- workbench/api/df_store.py +17 -108
- workbench/api/feature_set.py +41 -7
- workbench/api/parameter_store.py +3 -52
- workbench/core/artifacts/artifact.py +5 -5
- workbench/core/artifacts/df_store_core.py +114 -0
- workbench/core/artifacts/endpoint_core.py +184 -75
- workbench/core/artifacts/model_core.py +11 -7
- workbench/core/artifacts/parameter_store_core.py +98 -0
- workbench/core/transforms/features_to_model/features_to_model.py +27 -13
- workbench/core/transforms/model_to_endpoint/model_to_endpoint.py +11 -0
- workbench/core/transforms/pandas_transforms/pandas_to_features.py +11 -2
- workbench/model_scripts/chemprop/chemprop.template +312 -293
- workbench/model_scripts/chemprop/generated_model_script.py +316 -297
- workbench/model_scripts/custom_models/uq_models/ensemble_xgb.template +11 -5
- workbench/model_scripts/custom_models/uq_models/meta_uq.template +11 -5
- workbench/model_scripts/custom_models/uq_models/ngboost.template +11 -5
- workbench/model_scripts/ensemble_xgb/ensemble_xgb.template +11 -5
- workbench/model_scripts/pytorch_model/generated_model_script.py +278 -128
- workbench/model_scripts/pytorch_model/pytorch.template +273 -123
- workbench/model_scripts/uq_models/generated_model_script.py +20 -11
- workbench/model_scripts/uq_models/mapie.template +17 -8
- workbench/model_scripts/xgb_model/generated_model_script.py +38 -9
- workbench/model_scripts/xgb_model/xgb_model.template +34 -5
- workbench/resources/open_source_api.key +1 -1
- workbench/utils/chemprop_utils.py +38 -1
- workbench/utils/pytorch_utils.py +38 -8
- workbench/web_interface/components/model_plot.py +7 -1
- {workbench-0.8.201.dist-info → workbench-0.8.204.dist-info}/METADATA +2 -2
- {workbench-0.8.201.dist-info → workbench-0.8.204.dist-info}/RECORD +33 -33
- workbench/core/cloud_platform/aws/aws_df_store.py +0 -404
- workbench/core/cloud_platform/aws/aws_parameter_store.py +0 -296
- {workbench-0.8.201.dist-info → workbench-0.8.204.dist-info}/WHEEL +0 -0
- {workbench-0.8.201.dist-info → workbench-0.8.204.dist-info}/entry_points.txt +0 -0
- {workbench-0.8.201.dist-info → workbench-0.8.204.dist-info}/licenses/LICENSE +0 -0
- {workbench-0.8.201.dist-info → workbench-0.8.204.dist-info}/top_level.txt +0 -0
|
@@ -102,10 +102,21 @@ class ModelToEndpoint(Transform):
|
|
|
102
102
|
# Is this a serverless deployment?
|
|
103
103
|
serverless_config = None
|
|
104
104
|
if self.serverless:
|
|
105
|
+
# For PyTorch or ChemProp we need at least 4GB of memory
|
|
106
|
+
from workbench.api import ModelFramework
|
|
107
|
+
|
|
108
|
+
self.log.info(f"Model Framework: {workbench_model.model_framework}")
|
|
109
|
+
if workbench_model.model_framework in [ModelFramework.PYTORCH_TABULAR, ModelFramework.CHEMPROP]:
|
|
110
|
+
if mem_size < 4096:
|
|
111
|
+
self.log.important(
|
|
112
|
+
f"{workbench_model.model_framework} needs at least 4GB of memory (setting to 4GB)"
|
|
113
|
+
)
|
|
114
|
+
mem_size = 4096
|
|
105
115
|
serverless_config = ServerlessInferenceConfig(
|
|
106
116
|
memory_size_in_mb=mem_size,
|
|
107
117
|
max_concurrency=max_concurrency,
|
|
108
118
|
)
|
|
119
|
+
self.log.important(f"Serverless Config: Memory={mem_size}MB, MaxConcurrency={max_concurrency}")
|
|
109
120
|
|
|
110
121
|
# Configure data capture if requested (and not serverless)
|
|
111
122
|
data_capture_config = None
|
|
@@ -68,6 +68,15 @@ class PandasToFeatures(Transform):
|
|
|
68
68
|
self.output_df = input_df.copy()
|
|
69
69
|
self.one_hot_columns = one_hot_columns or []
|
|
70
70
|
|
|
71
|
+
# Warn about known AWS Iceberg bug with event_time_column
|
|
72
|
+
if event_time_column is not None:
|
|
73
|
+
self.log.warning(
|
|
74
|
+
f"event_time_column='{event_time_column}' specified. Note: AWS has a known bug with "
|
|
75
|
+
"Iceberg FeatureGroups where varying event times across multiple days can cause "
|
|
76
|
+
"duplicate rows in the offline store. Setting event_time_column=None."
|
|
77
|
+
)
|
|
78
|
+
self.event_time_column = None
|
|
79
|
+
|
|
71
80
|
# Now Prepare the DataFrame for its journey into an AWS FeatureGroup
|
|
72
81
|
self.prep_dataframe()
|
|
73
82
|
|
|
@@ -400,7 +409,7 @@ class PandasToFeatures(Transform):
|
|
|
400
409
|
|
|
401
410
|
# Set Hold Out Ids (if we got them during creation)
|
|
402
411
|
if self.incoming_hold_out_ids:
|
|
403
|
-
self.output_feature_set.set_training_holdouts(self.
|
|
412
|
+
self.output_feature_set.set_training_holdouts(self.incoming_hold_out_ids)
|
|
404
413
|
|
|
405
414
|
def ensure_feature_group_created(self, feature_group):
|
|
406
415
|
status = feature_group.describe().get("FeatureGroupStatus")
|
|
@@ -462,7 +471,7 @@ if __name__ == "__main__":
|
|
|
462
471
|
|
|
463
472
|
# Create my DF to Feature Set Transform (with one-hot encoding)
|
|
464
473
|
df_to_features = PandasToFeatures("test_features")
|
|
465
|
-
df_to_features.set_input(data_df, id_column="id", one_hot_columns=["food"])
|
|
474
|
+
df_to_features.set_input(data_df, id_column="id", event_time_column="date", one_hot_columns=["food"])
|
|
466
475
|
df_to_features.set_output_tags(["test", "small"])
|
|
467
476
|
df_to_features.transform()
|
|
468
477
|
|