workbench 0.8.239__py3-none-any.whl → 0.8.243__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- workbench/core/artifacts/endpoint_core.py +1 -1
- workbench/core/transforms/features_to_model/features_to_model.py +2 -2
- workbench/core/transforms/model_to_endpoint/model_to_endpoint.py +1 -2
- workbench/model_script_utils/pytorch_utils.py +2 -2
- workbench/model_scripts/chemprop/chemprop.template +8 -5
- workbench/model_scripts/chemprop/generated_model_script.py +10 -7
- workbench/model_scripts/pytorch_model/pytorch.template +1 -1
- workbench/model_scripts/pytorch_model/pytorch_utils.py +2 -2
- {workbench-0.8.239.dist-info → workbench-0.8.243.dist-info}/METADATA +1 -1
- {workbench-0.8.239.dist-info → workbench-0.8.243.dist-info}/RECORD +14 -14
- {workbench-0.8.239.dist-info → workbench-0.8.243.dist-info}/WHEEL +0 -0
- {workbench-0.8.239.dist-info → workbench-0.8.243.dist-info}/entry_points.txt +0 -0
- {workbench-0.8.239.dist-info → workbench-0.8.243.dist-info}/licenses/LICENSE +0 -0
- {workbench-0.8.239.dist-info → workbench-0.8.243.dist-info}/top_level.txt +0 -0
|
@@ -1138,7 +1138,7 @@ class EndpointCore(Artifact):
|
|
|
1138
1138
|
cls.log.error("Error deleting endpoint.")
|
|
1139
1139
|
raise e
|
|
1140
1140
|
|
|
1141
|
-
time.sleep(
|
|
1141
|
+
time.sleep(10) # Final sleep for AWS to fully register deletions
|
|
1142
1142
|
|
|
1143
1143
|
@classmethod
|
|
1144
1144
|
def delete_endpoint_models(cls, endpoint_name: str):
|
|
@@ -247,9 +247,9 @@ class FeaturesToModel(Transform):
|
|
|
247
247
|
# Create a Sagemaker Model with our script
|
|
248
248
|
image = ModelImages.get_image_uri(self.sm_session.boto_region_name, self.training_image)
|
|
249
249
|
|
|
250
|
-
# Use GPU instance for ChemProp/PyTorch
|
|
250
|
+
# Use GPU instance for ChemProp/PyTorch
|
|
251
251
|
if self.model_framework in [ModelFramework.CHEMPROP, ModelFramework.PYTORCH]:
|
|
252
|
-
train_instance_type = "ml.g6.xlarge" # NVIDIA L4 GPU, ~$
|
|
252
|
+
train_instance_type = "ml.g6.xlarge" # NVIDIA L4 GPU, ~$1.00/hr
|
|
253
253
|
self.log.important(f"Using GPU instance {train_instance_type} for {self.model_framework.value}")
|
|
254
254
|
else:
|
|
255
255
|
train_instance_type = "ml.m5.xlarge"
|
|
@@ -108,7 +108,6 @@ class ModelToEndpoint(Transform):
|
|
|
108
108
|
|
|
109
109
|
# Is this a serverless deployment?
|
|
110
110
|
serverless_config = None
|
|
111
|
-
instance_type = None
|
|
112
111
|
if self.serverless:
|
|
113
112
|
# For PyTorch or ChemProp we need at least 4GB of memory
|
|
114
113
|
if needs_more_resources and mem_size < 4096:
|
|
@@ -126,7 +125,7 @@ class ModelToEndpoint(Transform):
|
|
|
126
125
|
instance_type = self.instance
|
|
127
126
|
self.log.important(f"Realtime Endpoint: Using specified instance type: {instance_type}")
|
|
128
127
|
elif needs_more_resources:
|
|
129
|
-
instance_type = "ml.c7i.
|
|
128
|
+
instance_type = "ml.c7i.large"
|
|
130
129
|
self.log.important(f"{workbench_model.model_framework} needs more resources (using {instance_type})")
|
|
131
130
|
else:
|
|
132
131
|
instance_type = "ml.t2.medium"
|
|
@@ -266,8 +266,8 @@ def train_model(
|
|
|
266
266
|
train_dataset = TensorDataset(train_x_cont, dummy_cat, train_y)
|
|
267
267
|
val_dataset = TensorDataset(val_x_cont, dummy_val_cat, val_y)
|
|
268
268
|
|
|
269
|
-
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
|
|
270
|
-
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
|
|
269
|
+
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, pin_memory=True)
|
|
270
|
+
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, pin_memory=True)
|
|
271
271
|
|
|
272
272
|
# Loss and optimizer
|
|
273
273
|
if task == "classification":
|
|
@@ -34,7 +34,7 @@ DEFAULT_HYPERPARAMETERS = {
|
|
|
34
34
|
"n_folds": 5,
|
|
35
35
|
"max_epochs": 400,
|
|
36
36
|
"patience": 50,
|
|
37
|
-
"batch_size":
|
|
37
|
+
"batch_size": 64,
|
|
38
38
|
# Message Passing (ignored when using foundation model)
|
|
39
39
|
"hidden_dim": 700,
|
|
40
40
|
"depth": 6,
|
|
@@ -220,7 +220,9 @@ def predict_fn(df: pd.DataFrame, model_dict: dict) -> pd.DataFrame:
|
|
|
220
220
|
return df
|
|
221
221
|
|
|
222
222
|
dataset = data.MoleculeDataset(datapoints)
|
|
223
|
-
|
|
223
|
+
# Note: Use dataset length as batch_size to prevent ChemProp's build_dataloader from
|
|
224
|
+
# dropping single-sample batches (its drop_last logic triggers when len(dataset) % batch_size == 1)
|
|
225
|
+
dataloader = data.build_dataloader(dataset, shuffle=False, batch_size=len(dataset))
|
|
224
226
|
|
|
225
227
|
# Ensemble predictions using direct PyTorch inference (no Lightning Trainer)
|
|
226
228
|
all_preds = []
|
|
@@ -588,8 +590,9 @@ if __name__ == "__main__":
|
|
|
588
590
|
val_dataset.normalize_targets(target_scaler)
|
|
589
591
|
output_transform = nn.UnscaleTransform.from_standard_scaler(target_scaler)
|
|
590
592
|
|
|
591
|
-
|
|
592
|
-
|
|
593
|
+
num_workers = min(os.cpu_count() or 4, 8) # Scale with CPUs, cap at 8
|
|
594
|
+
train_loader = data.build_dataloader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=num_workers, persistent_workers=num_workers > 0, pin_memory=True, prefetch_factor=2)
|
|
595
|
+
val_loader = data.build_dataloader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers, persistent_workers=num_workers > 0, pin_memory=True, prefetch_factor=2)
|
|
593
596
|
|
|
594
597
|
# Build model
|
|
595
598
|
pl.seed_everything(hyperparameters["seed"] + fold_idx)
|
|
@@ -615,7 +618,7 @@ if __name__ == "__main__":
|
|
|
615
618
|
callbacks.append(pl.callbacks.ModelCheckpoint(
|
|
616
619
|
dirpath=args.model_dir, filename=f"best_{fold_idx}", monitor="val_loss", mode="min", save_top_k=1
|
|
617
620
|
))
|
|
618
|
-
return pl.Trainer(accelerator="auto", max_epochs=max_epochs, logger=False, enable_progress_bar=True, callbacks=callbacks)
|
|
621
|
+
return pl.Trainer(accelerator="auto", max_epochs=max_epochs, precision="16-mixed", logger=False, enable_progress_bar=True, callbacks=callbacks)
|
|
619
622
|
|
|
620
623
|
if use_two_phase:
|
|
621
624
|
# Phase 1: Freeze MPNN, train FFN only
|
|
@@ -45,10 +45,10 @@ DEFAULT_HYPERPARAMETERS = {
|
|
|
45
45
|
# Loss function for regression (mae, mse)
|
|
46
46
|
"criterion": "mae",
|
|
47
47
|
# Split strategy: "random", "scaffold", or "butina"
|
|
48
|
-
# - random: Standard random split
|
|
48
|
+
# - random: Standard random split (default)
|
|
49
49
|
# - scaffold: Bemis-Murcko scaffold-based grouping
|
|
50
50
|
# - butina: Morgan fingerprint clustering (recommended for ADMET)
|
|
51
|
-
"split_strategy": "
|
|
51
|
+
"split_strategy": "random",
|
|
52
52
|
"butina_cutoff": 0.4, # Tanimoto distance cutoff for Butina clustering
|
|
53
53
|
# Random seed
|
|
54
54
|
"seed": 42,
|
|
@@ -65,11 +65,11 @@ DEFAULT_HYPERPARAMETERS = {
|
|
|
65
65
|
# Template parameters (filled in by Workbench)
|
|
66
66
|
TEMPLATE_PARAMS = {
|
|
67
67
|
"model_type": "uq_regressor",
|
|
68
|
-
"targets": ['
|
|
68
|
+
"targets": ['udm_asy_res_extraction_percent'],
|
|
69
69
|
"feature_list": ['smiles'],
|
|
70
|
-
"id_column": "
|
|
71
|
-
"model_metrics_s3_path": "s3://
|
|
72
|
-
"hyperparameters": {
|
|
70
|
+
"id_column": "udm_mol_bat_id",
|
|
71
|
+
"model_metrics_s3_path": "s3://idb-prod-sageworks-artifacts/models/hlm-extraction-reg-chemprop-1-260128/training",
|
|
72
|
+
"hyperparameters": {},
|
|
73
73
|
}
|
|
74
74
|
|
|
75
75
|
|
|
@@ -220,7 +220,9 @@ def predict_fn(df: pd.DataFrame, model_dict: dict) -> pd.DataFrame:
|
|
|
220
220
|
return df
|
|
221
221
|
|
|
222
222
|
dataset = data.MoleculeDataset(datapoints)
|
|
223
|
-
|
|
223
|
+
# Note: Use dataset length as batch_size to prevent ChemProp's build_dataloader from
|
|
224
|
+
# dropping single-sample batches (its drop_last logic triggers when len(dataset) % batch_size == 1)
|
|
225
|
+
dataloader = data.build_dataloader(dataset, shuffle=False, batch_size=len(dataset))
|
|
224
226
|
|
|
225
227
|
# Ensemble predictions using direct PyTorch inference (no Lightning Trainer)
|
|
226
228
|
all_preds = []
|
|
@@ -752,6 +754,7 @@ if __name__ == "__main__":
|
|
|
752
754
|
output_columns += [f"{t}_pred" for t in target_columns] + [f"{t}_pred_std" for t in target_columns]
|
|
753
755
|
output_columns += ["prediction", "prediction_std", "confidence"]
|
|
754
756
|
output_columns += [c for c in df_val.columns if c.endswith("_proba")]
|
|
757
|
+
|
|
755
758
|
output_columns = [c for c in output_columns if c in df_val.columns]
|
|
756
759
|
|
|
757
760
|
wr.s3.to_csv(df_val[output_columns], f"{model_metrics_s3_path}/validation_predictions.csv", index=False)
|
|
@@ -45,7 +45,7 @@ DEFAULT_HYPERPARAMETERS = {
|
|
|
45
45
|
"n_folds": 5,
|
|
46
46
|
"max_epochs": 200,
|
|
47
47
|
"early_stopping_patience": 30,
|
|
48
|
-
"batch_size":
|
|
48
|
+
"batch_size": 64,
|
|
49
49
|
# Model architecture (larger capacity - ensemble provides regularization)
|
|
50
50
|
"layers": "512-256-128",
|
|
51
51
|
"learning_rate": 1e-3,
|
|
@@ -266,8 +266,8 @@ def train_model(
|
|
|
266
266
|
train_dataset = TensorDataset(train_x_cont, dummy_cat, train_y)
|
|
267
267
|
val_dataset = TensorDataset(val_x_cont, dummy_val_cat, val_y)
|
|
268
268
|
|
|
269
|
-
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
|
|
270
|
-
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
|
|
269
|
+
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, pin_memory=True)
|
|
270
|
+
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, pin_memory=True)
|
|
271
271
|
|
|
272
272
|
# Loss and optimizer
|
|
273
273
|
if task == "classification":
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: workbench
|
|
3
|
-
Version: 0.8.
|
|
3
|
+
Version: 0.8.243
|
|
4
4
|
Summary: Workbench: A Dashboard and Python API for creating and deploying AWS SageMaker Model Pipelines
|
|
5
5
|
Author-email: SuperCowPowers LLC <support@supercowpowers.com>
|
|
6
6
|
License: MIT License
|
|
@@ -60,7 +60,7 @@ workbench/core/artifacts/data_capture_core.py,sha256=q8f79rRTYiZ7T4IQRWXl8ZvPpcv
|
|
|
60
60
|
workbench/core/artifacts/data_source_abstract.py,sha256=5IRCzFVK-17cd4NXPMRfx99vQAmQ0WHE5jcm5RfsVTg,10619
|
|
61
61
|
workbench/core/artifacts/data_source_factory.py,sha256=YL_tA5fsgubbB3dPF6T4tO0rGgz-6oo3ge4i_YXVC-M,2380
|
|
62
62
|
workbench/core/artifacts/df_store_core.py,sha256=AueNr_JvuLLu_ByE7cb3u-isH9u0Q7cMP-UCgCX-Ctg,3536
|
|
63
|
-
workbench/core/artifacts/endpoint_core.py,sha256=
|
|
63
|
+
workbench/core/artifacts/endpoint_core.py,sha256=hEjMXag9nsLGu_eOLxPSSqcu1aP_1Vo9DHIGemLutvc,55695
|
|
64
64
|
workbench/core/artifacts/feature_set_core.py,sha256=IjSUpxpj2S611uo5LmnOK-aH3CZhfbC5ztC02PQ5gqE,42128
|
|
65
65
|
workbench/core/artifacts/model_core.py,sha256=wPkpdRlxnAXMqsDtJGPotGFO146Hm7NCfYbImHwZo9c,52343
|
|
66
66
|
workbench/core/artifacts/monitor_core.py,sha256=M307yz7tEzOEHgv-LmtVy9jKjSbM98fHW3ckmNYrwlU,27897
|
|
@@ -107,9 +107,9 @@ workbench/core/transforms/features_to_features/__init__.py,sha256=47DEQpj8HBSa-_
|
|
|
107
107
|
workbench/core/transforms/features_to_features/heavy/emr/Readme.md,sha256=YtQgCEQeKe0CQXQkhzMTYq9xOtCsCYb5P5LW2BmRKWQ,68
|
|
108
108
|
workbench/core/transforms/features_to_features/heavy/glue/Readme.md,sha256=TuyCatWfoDr99zUwvOcxf-TqMkQzaMqXlj5nmFcRzfo,48
|
|
109
109
|
workbench/core/transforms/features_to_model/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
110
|
-
workbench/core/transforms/features_to_model/features_to_model.py,sha256=
|
|
110
|
+
workbench/core/transforms/features_to_model/features_to_model.py,sha256=pYGdfnp-6xh79kxQ5iXySi7oYcaRuQ-xLDo1rFgDB7g,20876
|
|
111
111
|
workbench/core/transforms/model_to_endpoint/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
112
|
-
workbench/core/transforms/model_to_endpoint/model_to_endpoint.py,sha256=
|
|
112
|
+
workbench/core/transforms/model_to_endpoint/model_to_endpoint.py,sha256=I44_ziQ0IegudLQ_qJ-XNfWZInDkXWI9LsE-1o9855w,8823
|
|
113
113
|
workbench/core/transforms/pandas_transforms/__init__.py,sha256=xL4MT8-fZ1SFqDbTLc8XyxjupHtB1YR6Ej0AC2nwd7I,894
|
|
114
114
|
workbench/core/transforms/pandas_transforms/data_to_pandas.py,sha256=sJHPeuNF8Q8aQqgRnkdWkyvur5cbggdUVIwR-xF3Dlo,3621
|
|
115
115
|
workbench/core/transforms/pandas_transforms/features_to_pandas.py,sha256=af6xdPt2V4zhh-SzQa_UYxdmNMzMLXbrbsznV5QoIJg,3441
|
|
@@ -128,11 +128,11 @@ workbench/core/views/view.py,sha256=DvmEA1xdvL980GET_cnbmHzqSy6IhlNaZcoQnVTtYis,
|
|
|
128
128
|
workbench/core/views/view_utils.py,sha256=CwOlpqXpumCr6REi-ey7Qjz5_tpg-s4oWHmlOVu8POQ,12270
|
|
129
129
|
workbench/core/views/storage/mdq_view.py,sha256=qf_ep1KwaXOIfO930laEwNIiCYP7VNOqjE3VdHfopRE,5195
|
|
130
130
|
workbench/model_script_utils/model_script_utils.py,sha256=aM3ZaJxyMy7smokIF83fXUx3YSzLs8BNNMLfJDCoe8I,21231
|
|
131
|
-
workbench/model_script_utils/pytorch_utils.py,sha256=
|
|
131
|
+
workbench/model_script_utils/pytorch_utils.py,sha256=kQCTRqdbszlurMrzyflyOo2amDJYx3Pni1rRhGHWXm4,13738
|
|
132
132
|
workbench/model_script_utils/uq_harness.py,sha256=Qv5UQdjn72Ssa3NWGGsnSB_wDp0au2TXVauFK81Ebr0,11498
|
|
133
133
|
workbench/model_scripts/script_generation.py,sha256=Sv0OJdASNKk1KXr8goiZWUL5W7i8G8gBb_R_OTb8caI,8257
|
|
134
|
-
workbench/model_scripts/chemprop/chemprop.template,sha256=
|
|
135
|
-
workbench/model_scripts/chemprop/generated_model_script.py,sha256=
|
|
134
|
+
workbench/model_scripts/chemprop/chemprop.template,sha256=otuR2Ee-GogsNo4z1MlefXY9G--ZOTgg4rFc_5NXivw,36941
|
|
135
|
+
workbench/model_scripts/chemprop/generated_model_script.py,sha256=6duTkJUH1eRrsGHAZN1DWRKR74K5tsXKcQPrWd3vjxQ,36724
|
|
136
136
|
workbench/model_scripts/chemprop/model_script_utils.py,sha256=aM3ZaJxyMy7smokIF83fXUx3YSzLs8BNNMLfJDCoe8I,21231
|
|
137
137
|
workbench/model_scripts/chemprop/requirements.txt,sha256=2IBHZZNYqhX9Ed7AmRVgN06tO3EHeBbN2EM8-tjWZhs,216
|
|
138
138
|
workbench/model_scripts/custom_models/chem_info/Readme.md,sha256=mH1lxJ4Pb7F5nBnVXaiuxpi8zS_yjUw_LBJepVKXhlA,574
|
|
@@ -162,8 +162,8 @@ workbench/model_scripts/meta_model/generated_model_script.py,sha256=ncPrHd9-R8l_
|
|
|
162
162
|
workbench/model_scripts/meta_model/meta_model.template,sha256=viz-AKVq3YRwOUBt8-rUO1TwdEPFzyP7nnifqcIJurw,8244
|
|
163
163
|
workbench/model_scripts/pytorch_model/generated_model_script.py,sha256=1B4RortOxbB7feTrr5Kf9qUqdqG4Qc1a6evdNUYLSNg,27011
|
|
164
164
|
workbench/model_scripts/pytorch_model/model_script_utils.py,sha256=aM3ZaJxyMy7smokIF83fXUx3YSzLs8BNNMLfJDCoe8I,21231
|
|
165
|
-
workbench/model_scripts/pytorch_model/pytorch.template,sha256=
|
|
166
|
-
workbench/model_scripts/pytorch_model/pytorch_utils.py,sha256=
|
|
165
|
+
workbench/model_scripts/pytorch_model/pytorch.template,sha256=78TBsT1NoPkVL-cINZMjA1SE91abUgKtNqedOCvS7lU,22766
|
|
166
|
+
workbench/model_scripts/pytorch_model/pytorch_utils.py,sha256=kQCTRqdbszlurMrzyflyOo2amDJYx3Pni1rRhGHWXm4,13738
|
|
167
167
|
workbench/model_scripts/pytorch_model/requirements.txt,sha256=ES7YehHEL4E5oV8FScHm3oNQmkMI4ODgbC1fSbaY7T4,183
|
|
168
168
|
workbench/model_scripts/pytorch_model/uq_harness.py,sha256=Qv5UQdjn72Ssa3NWGGsnSB_wDp0au2TXVauFK81Ebr0,11498
|
|
169
169
|
workbench/model_scripts/scikit_learn/generated_model_script.py,sha256=xhQIglpAgPRCH9iwI3wI0N0V6p9AgqW0mVOMuSXzUCk,17187
|
|
@@ -305,9 +305,9 @@ workbench/web_interface/page_views/main_page.py,sha256=DyChwOGX_KtbJ09pw2Iswofba
|
|
|
305
305
|
workbench/web_interface/page_views/models_page_view.py,sha256=M0bdC7bAzLyIaE2jviY12FF4abdMFZmg6sFuOY_LaGI,2650
|
|
306
306
|
workbench/web_interface/page_views/page_view.py,sha256=Gh6YnpOGlUejx-bHZAf5pzqoQ1H1R0OSwOpGhOBO06w,455
|
|
307
307
|
workbench/web_interface/page_views/pipelines_page_view.py,sha256=v2pxrIbsHBcYiblfius3JK766NZ7ciD2yPx0t3E5IJo,2656
|
|
308
|
-
workbench-0.8.
|
|
309
|
-
workbench-0.8.
|
|
310
|
-
workbench-0.8.
|
|
311
|
-
workbench-0.8.
|
|
312
|
-
workbench-0.8.
|
|
313
|
-
workbench-0.8.
|
|
308
|
+
workbench-0.8.243.dist-info/licenses/LICENSE,sha256=RTBoTMeEwTgEhS-n8vgQ-VUo5qig0PWVd8xFPKU6Lck,1080
|
|
309
|
+
workbench-0.8.243.dist-info/METADATA,sha256=mWeiFAV-J1TZKygpTEymX4gPhl2dFfscZKPOFitOMFo,10038
|
|
310
|
+
workbench-0.8.243.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
|
|
311
|
+
workbench-0.8.243.dist-info/entry_points.txt,sha256=Stivs_FFse2pHLXfWNpyh649z0bj7Ks5laQy8LuexCA,633
|
|
312
|
+
workbench-0.8.243.dist-info/top_level.txt,sha256=Dhy72zTxaA_o_yRkPZx5zw-fwumnjGaeGf0hBN3jc_w,10
|
|
313
|
+
workbench-0.8.243.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|