workbench 0.8.239__py3-none-any.whl → 0.8.243__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1138,7 +1138,7 @@ class EndpointCore(Artifact):
1138
1138
  cls.log.error("Error deleting endpoint.")
1139
1139
  raise e
1140
1140
 
1141
- time.sleep(5) # Final sleep for AWS to fully register deletions
1141
+ time.sleep(10) # Final sleep for AWS to fully register deletions
1142
1142
 
1143
1143
  @classmethod
1144
1144
  def delete_endpoint_models(cls, endpoint_name: str):
@@ -247,9 +247,9 @@ class FeaturesToModel(Transform):
247
247
  # Create a Sagemaker Model with our script
248
248
  image = ModelImages.get_image_uri(self.sm_session.boto_region_name, self.training_image)
249
249
 
250
- # Use GPU instance for ChemProp/PyTorch, CPU for others
250
+ # Use GPU instance for ChemProp/PyTorch
251
251
  if self.model_framework in [ModelFramework.CHEMPROP, ModelFramework.PYTORCH]:
252
- train_instance_type = "ml.g6.xlarge" # NVIDIA L4 GPU, ~$0.80/hr
252
+ train_instance_type = "ml.g6.xlarge" # NVIDIA L4 GPU, ~$1.00/hr
253
253
  self.log.important(f"Using GPU instance {train_instance_type} for {self.model_framework.value}")
254
254
  else:
255
255
  train_instance_type = "ml.m5.xlarge"
@@ -108,7 +108,6 @@ class ModelToEndpoint(Transform):
108
108
 
109
109
  # Is this a serverless deployment?
110
110
  serverless_config = None
111
- instance_type = None
112
111
  if self.serverless:
113
112
  # For PyTorch or ChemProp we need at least 4GB of memory
114
113
  if needs_more_resources and mem_size < 4096:
@@ -126,7 +125,7 @@ class ModelToEndpoint(Transform):
126
125
  instance_type = self.instance
127
126
  self.log.important(f"Realtime Endpoint: Using specified instance type: {instance_type}")
128
127
  elif needs_more_resources:
129
- instance_type = "ml.c7i.xlarge"
128
+ instance_type = "ml.c7i.large"
130
129
  self.log.important(f"{workbench_model.model_framework} needs more resources (using {instance_type})")
131
130
  else:
132
131
  instance_type = "ml.t2.medium"
@@ -266,8 +266,8 @@ def train_model(
266
266
  train_dataset = TensorDataset(train_x_cont, dummy_cat, train_y)
267
267
  val_dataset = TensorDataset(val_x_cont, dummy_val_cat, val_y)
268
268
 
269
- train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
270
- val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
269
+ train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, pin_memory=True)
270
+ val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, pin_memory=True)
271
271
 
272
272
  # Loss and optimizer
273
273
  if task == "classification":
@@ -34,7 +34,7 @@ DEFAULT_HYPERPARAMETERS = {
34
34
  "n_folds": 5,
35
35
  "max_epochs": 400,
36
36
  "patience": 50,
37
- "batch_size": 32,
37
+ "batch_size": 64,
38
38
  # Message Passing (ignored when using foundation model)
39
39
  "hidden_dim": 700,
40
40
  "depth": 6,
@@ -220,7 +220,9 @@ def predict_fn(df: pd.DataFrame, model_dict: dict) -> pd.DataFrame:
220
220
  return df
221
221
 
222
222
  dataset = data.MoleculeDataset(datapoints)
223
- dataloader = data.build_dataloader(dataset, shuffle=False, batch_size=64, drop_last=False)
223
+ # Note: Use dataset length as batch_size to prevent ChemProp's build_dataloader from
224
+ # dropping single-sample batches (its drop_last logic triggers when len(dataset) % batch_size == 1)
225
+ dataloader = data.build_dataloader(dataset, shuffle=False, batch_size=len(dataset))
224
226
 
225
227
  # Ensemble predictions using direct PyTorch inference (no Lightning Trainer)
226
228
  all_preds = []
@@ -588,8 +590,9 @@ if __name__ == "__main__":
588
590
  val_dataset.normalize_targets(target_scaler)
589
591
  output_transform = nn.UnscaleTransform.from_standard_scaler(target_scaler)
590
592
 
591
- train_loader = data.build_dataloader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=3)
592
- val_loader = data.build_dataloader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=3)
593
+ num_workers = min(os.cpu_count() or 4, 8) # Scale with CPUs, cap at 8
594
+ train_loader = data.build_dataloader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=num_workers, persistent_workers=num_workers > 0, pin_memory=True, prefetch_factor=2)
595
+ val_loader = data.build_dataloader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers, persistent_workers=num_workers > 0, pin_memory=True, prefetch_factor=2)
593
596
 
594
597
  # Build model
595
598
  pl.seed_everything(hyperparameters["seed"] + fold_idx)
@@ -615,7 +618,7 @@ if __name__ == "__main__":
615
618
  callbacks.append(pl.callbacks.ModelCheckpoint(
616
619
  dirpath=args.model_dir, filename=f"best_{fold_idx}", monitor="val_loss", mode="min", save_top_k=1
617
620
  ))
618
- return pl.Trainer(accelerator="auto", max_epochs=max_epochs, logger=False, enable_progress_bar=True, callbacks=callbacks)
621
+ return pl.Trainer(accelerator="auto", max_epochs=max_epochs, precision="16-mixed", logger=False, enable_progress_bar=True, callbacks=callbacks)
619
622
 
620
623
  if use_two_phase:
621
624
  # Phase 1: Freeze MPNN, train FFN only
@@ -45,10 +45,10 @@ DEFAULT_HYPERPARAMETERS = {
45
45
  # Loss function for regression (mae, mse)
46
46
  "criterion": "mae",
47
47
  # Split strategy: "random", "scaffold", or "butina"
48
- # - random: Standard random split
48
+ # - random: Standard random split (default)
49
49
  # - scaffold: Bemis-Murcko scaffold-based grouping
50
50
  # - butina: Morgan fingerprint clustering (recommended for ADMET)
51
- "split_strategy": "butina",
51
+ "split_strategy": "random",
52
52
  "butina_cutoff": 0.4, # Tanimoto distance cutoff for Butina clustering
53
53
  # Random seed
54
54
  "seed": 42,
@@ -65,11 +65,11 @@ DEFAULT_HYPERPARAMETERS = {
65
65
  # Template parameters (filled in by Workbench)
66
66
  TEMPLATE_PARAMS = {
67
67
  "model_type": "uq_regressor",
68
- "targets": ['logd'],
68
+ "targets": ['udm_asy_res_extraction_percent'],
69
69
  "feature_list": ['smiles'],
70
- "id_column": "molecule_name",
71
- "model_metrics_s3_path": "s3://sandbox-sageworks-artifacts/models/logd-chemprop-split-butina/training",
72
- "hyperparameters": {'split_strategy': 'butina'},
70
+ "id_column": "udm_mol_bat_id",
71
+ "model_metrics_s3_path": "s3://idb-prod-sageworks-artifacts/models/hlm-extraction-reg-chemprop-1-260128/training",
72
+ "hyperparameters": {},
73
73
  }
74
74
 
75
75
 
@@ -220,7 +220,9 @@ def predict_fn(df: pd.DataFrame, model_dict: dict) -> pd.DataFrame:
220
220
  return df
221
221
 
222
222
  dataset = data.MoleculeDataset(datapoints)
223
- dataloader = data.build_dataloader(dataset, shuffle=False, batch_size=64)
223
+ # Note: Use dataset length as batch_size to prevent ChemProp's build_dataloader from
224
+ # dropping single-sample batches (its drop_last logic triggers when len(dataset) % batch_size == 1)
225
+ dataloader = data.build_dataloader(dataset, shuffle=False, batch_size=len(dataset))
224
226
 
225
227
  # Ensemble predictions using direct PyTorch inference (no Lightning Trainer)
226
228
  all_preds = []
@@ -752,6 +754,7 @@ if __name__ == "__main__":
752
754
  output_columns += [f"{t}_pred" for t in target_columns] + [f"{t}_pred_std" for t in target_columns]
753
755
  output_columns += ["prediction", "prediction_std", "confidence"]
754
756
  output_columns += [c for c in df_val.columns if c.endswith("_proba")]
757
+
755
758
  output_columns = [c for c in output_columns if c in df_val.columns]
756
759
 
757
760
  wr.s3.to_csv(df_val[output_columns], f"{model_metrics_s3_path}/validation_predictions.csv", index=False)
@@ -45,7 +45,7 @@ DEFAULT_HYPERPARAMETERS = {
45
45
  "n_folds": 5,
46
46
  "max_epochs": 200,
47
47
  "early_stopping_patience": 30,
48
- "batch_size": 128,
48
+ "batch_size": 64,
49
49
  # Model architecture (larger capacity - ensemble provides regularization)
50
50
  "layers": "512-256-128",
51
51
  "learning_rate": 1e-3,
@@ -266,8 +266,8 @@ def train_model(
266
266
  train_dataset = TensorDataset(train_x_cont, dummy_cat, train_y)
267
267
  val_dataset = TensorDataset(val_x_cont, dummy_val_cat, val_y)
268
268
 
269
- train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
270
- val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
269
+ train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, pin_memory=True)
270
+ val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, pin_memory=True)
271
271
 
272
272
  # Loss and optimizer
273
273
  if task == "classification":
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: workbench
3
- Version: 0.8.239
3
+ Version: 0.8.243
4
4
  Summary: Workbench: A Dashboard and Python API for creating and deploying AWS SageMaker Model Pipelines
5
5
  Author-email: SuperCowPowers LLC <support@supercowpowers.com>
6
6
  License: MIT License
@@ -60,7 +60,7 @@ workbench/core/artifacts/data_capture_core.py,sha256=q8f79rRTYiZ7T4IQRWXl8ZvPpcv
60
60
  workbench/core/artifacts/data_source_abstract.py,sha256=5IRCzFVK-17cd4NXPMRfx99vQAmQ0WHE5jcm5RfsVTg,10619
61
61
  workbench/core/artifacts/data_source_factory.py,sha256=YL_tA5fsgubbB3dPF6T4tO0rGgz-6oo3ge4i_YXVC-M,2380
62
62
  workbench/core/artifacts/df_store_core.py,sha256=AueNr_JvuLLu_ByE7cb3u-isH9u0Q7cMP-UCgCX-Ctg,3536
63
- workbench/core/artifacts/endpoint_core.py,sha256=e9Fs07D2SXvLabaywTndX8R1iyO-WjHoNW4A80UUiSs,55694
63
+ workbench/core/artifacts/endpoint_core.py,sha256=hEjMXag9nsLGu_eOLxPSSqcu1aP_1Vo9DHIGemLutvc,55695
64
64
  workbench/core/artifacts/feature_set_core.py,sha256=IjSUpxpj2S611uo5LmnOK-aH3CZhfbC5ztC02PQ5gqE,42128
65
65
  workbench/core/artifacts/model_core.py,sha256=wPkpdRlxnAXMqsDtJGPotGFO146Hm7NCfYbImHwZo9c,52343
66
66
  workbench/core/artifacts/monitor_core.py,sha256=M307yz7tEzOEHgv-LmtVy9jKjSbM98fHW3ckmNYrwlU,27897
@@ -107,9 +107,9 @@ workbench/core/transforms/features_to_features/__init__.py,sha256=47DEQpj8HBSa-_
107
107
  workbench/core/transforms/features_to_features/heavy/emr/Readme.md,sha256=YtQgCEQeKe0CQXQkhzMTYq9xOtCsCYb5P5LW2BmRKWQ,68
108
108
  workbench/core/transforms/features_to_features/heavy/glue/Readme.md,sha256=TuyCatWfoDr99zUwvOcxf-TqMkQzaMqXlj5nmFcRzfo,48
109
109
  workbench/core/transforms/features_to_model/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
110
- workbench/core/transforms/features_to_model/features_to_model.py,sha256=stTOKAh_OJaI4ao6G8GRECa78sViaJXBzwt9myK5joM,20892
110
+ workbench/core/transforms/features_to_model/features_to_model.py,sha256=pYGdfnp-6xh79kxQ5iXySi7oYcaRuQ-xLDo1rFgDB7g,20876
111
111
  workbench/core/transforms/model_to_endpoint/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
112
- workbench/core/transforms/model_to_endpoint/model_to_endpoint.py,sha256=PGDrpWvQMhMTgLGZQ6bw2blsmWgMAXyIIL0NKt3E09A,8853
112
+ workbench/core/transforms/model_to_endpoint/model_to_endpoint.py,sha256=I44_ziQ0IegudLQ_qJ-XNfWZInDkXWI9LsE-1o9855w,8823
113
113
  workbench/core/transforms/pandas_transforms/__init__.py,sha256=xL4MT8-fZ1SFqDbTLc8XyxjupHtB1YR6Ej0AC2nwd7I,894
114
114
  workbench/core/transforms/pandas_transforms/data_to_pandas.py,sha256=sJHPeuNF8Q8aQqgRnkdWkyvur5cbggdUVIwR-xF3Dlo,3621
115
115
  workbench/core/transforms/pandas_transforms/features_to_pandas.py,sha256=af6xdPt2V4zhh-SzQa_UYxdmNMzMLXbrbsznV5QoIJg,3441
@@ -128,11 +128,11 @@ workbench/core/views/view.py,sha256=DvmEA1xdvL980GET_cnbmHzqSy6IhlNaZcoQnVTtYis,
128
128
  workbench/core/views/view_utils.py,sha256=CwOlpqXpumCr6REi-ey7Qjz5_tpg-s4oWHmlOVu8POQ,12270
129
129
  workbench/core/views/storage/mdq_view.py,sha256=qf_ep1KwaXOIfO930laEwNIiCYP7VNOqjE3VdHfopRE,5195
130
130
  workbench/model_script_utils/model_script_utils.py,sha256=aM3ZaJxyMy7smokIF83fXUx3YSzLs8BNNMLfJDCoe8I,21231
131
- workbench/model_script_utils/pytorch_utils.py,sha256=vr8ybK45U0H8Jhjb5qx6xbJNozdcl7bVqubknDwh6U0,13704
131
+ workbench/model_script_utils/pytorch_utils.py,sha256=kQCTRqdbszlurMrzyflyOo2amDJYx3Pni1rRhGHWXm4,13738
132
132
  workbench/model_script_utils/uq_harness.py,sha256=Qv5UQdjn72Ssa3NWGGsnSB_wDp0au2TXVauFK81Ebr0,11498
133
133
  workbench/model_scripts/script_generation.py,sha256=Sv0OJdASNKk1KXr8goiZWUL5W7i8G8gBb_R_OTb8caI,8257
134
- workbench/model_scripts/chemprop/chemprop.template,sha256=WtXYuC5NB2kY1nPRaS1VvaE72i6U2dIh6KZXIkT9t-o,36490
135
- workbench/model_scripts/chemprop/generated_model_script.py,sha256=awO8O1Arbpct8c3QoUjABWQ2ZbVus-ie8dNRLo1UiD4,36498
134
+ workbench/model_scripts/chemprop/chemprop.template,sha256=otuR2Ee-GogsNo4z1MlefXY9G--ZOTgg4rFc_5NXivw,36941
135
+ workbench/model_scripts/chemprop/generated_model_script.py,sha256=6duTkJUH1eRrsGHAZN1DWRKR74K5tsXKcQPrWd3vjxQ,36724
136
136
  workbench/model_scripts/chemprop/model_script_utils.py,sha256=aM3ZaJxyMy7smokIF83fXUx3YSzLs8BNNMLfJDCoe8I,21231
137
137
  workbench/model_scripts/chemprop/requirements.txt,sha256=2IBHZZNYqhX9Ed7AmRVgN06tO3EHeBbN2EM8-tjWZhs,216
138
138
  workbench/model_scripts/custom_models/chem_info/Readme.md,sha256=mH1lxJ4Pb7F5nBnVXaiuxpi8zS_yjUw_LBJepVKXhlA,574
@@ -162,8 +162,8 @@ workbench/model_scripts/meta_model/generated_model_script.py,sha256=ncPrHd9-R8l_
162
162
  workbench/model_scripts/meta_model/meta_model.template,sha256=viz-AKVq3YRwOUBt8-rUO1TwdEPFzyP7nnifqcIJurw,8244
163
163
  workbench/model_scripts/pytorch_model/generated_model_script.py,sha256=1B4RortOxbB7feTrr5Kf9qUqdqG4Qc1a6evdNUYLSNg,27011
164
164
  workbench/model_scripts/pytorch_model/model_script_utils.py,sha256=aM3ZaJxyMy7smokIF83fXUx3YSzLs8BNNMLfJDCoe8I,21231
165
- workbench/model_scripts/pytorch_model/pytorch.template,sha256=FZYI4D-u5lDkJSyvgJYVhtvt9PnfL_pEVGtBYv64sNU,22767
166
- workbench/model_scripts/pytorch_model/pytorch_utils.py,sha256=vr8ybK45U0H8Jhjb5qx6xbJNozdcl7bVqubknDwh6U0,13704
165
+ workbench/model_scripts/pytorch_model/pytorch.template,sha256=78TBsT1NoPkVL-cINZMjA1SE91abUgKtNqedOCvS7lU,22766
166
+ workbench/model_scripts/pytorch_model/pytorch_utils.py,sha256=kQCTRqdbszlurMrzyflyOo2amDJYx3Pni1rRhGHWXm4,13738
167
167
  workbench/model_scripts/pytorch_model/requirements.txt,sha256=ES7YehHEL4E5oV8FScHm3oNQmkMI4ODgbC1fSbaY7T4,183
168
168
  workbench/model_scripts/pytorch_model/uq_harness.py,sha256=Qv5UQdjn72Ssa3NWGGsnSB_wDp0au2TXVauFK81Ebr0,11498
169
169
  workbench/model_scripts/scikit_learn/generated_model_script.py,sha256=xhQIglpAgPRCH9iwI3wI0N0V6p9AgqW0mVOMuSXzUCk,17187
@@ -305,9 +305,9 @@ workbench/web_interface/page_views/main_page.py,sha256=DyChwOGX_KtbJ09pw2Iswofba
305
305
  workbench/web_interface/page_views/models_page_view.py,sha256=M0bdC7bAzLyIaE2jviY12FF4abdMFZmg6sFuOY_LaGI,2650
306
306
  workbench/web_interface/page_views/page_view.py,sha256=Gh6YnpOGlUejx-bHZAf5pzqoQ1H1R0OSwOpGhOBO06w,455
307
307
  workbench/web_interface/page_views/pipelines_page_view.py,sha256=v2pxrIbsHBcYiblfius3JK766NZ7ciD2yPx0t3E5IJo,2656
308
- workbench-0.8.239.dist-info/licenses/LICENSE,sha256=RTBoTMeEwTgEhS-n8vgQ-VUo5qig0PWVd8xFPKU6Lck,1080
309
- workbench-0.8.239.dist-info/METADATA,sha256=fPofZrAWmIGGYOLeDe95A7yMaMplxZE-gK5Zzi22AQQ,10038
310
- workbench-0.8.239.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
311
- workbench-0.8.239.dist-info/entry_points.txt,sha256=Stivs_FFse2pHLXfWNpyh649z0bj7Ks5laQy8LuexCA,633
312
- workbench-0.8.239.dist-info/top_level.txt,sha256=Dhy72zTxaA_o_yRkPZx5zw-fwumnjGaeGf0hBN3jc_w,10
313
- workbench-0.8.239.dist-info/RECORD,,
308
+ workbench-0.8.243.dist-info/licenses/LICENSE,sha256=RTBoTMeEwTgEhS-n8vgQ-VUo5qig0PWVd8xFPKU6Lck,1080
309
+ workbench-0.8.243.dist-info/METADATA,sha256=mWeiFAV-J1TZKygpTEymX4gPhl2dFfscZKPOFitOMFo,10038
310
+ workbench-0.8.243.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
311
+ workbench-0.8.243.dist-info/entry_points.txt,sha256=Stivs_FFse2pHLXfWNpyh649z0bj7Ks5laQy8LuexCA,633
312
+ workbench-0.8.243.dist-info/top_level.txt,sha256=Dhy72zTxaA_o_yRkPZx5zw-fwumnjGaeGf0hBN3jc_w,10
313
+ workbench-0.8.243.dist-info/RECORD,,