workbench 0.8.203__py3-none-any.whl → 0.8.204__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -795,30 +795,6 @@ class EndpointCore(Artifact):
795
795
  combined = row_hashes.values.tobytes()
796
796
  return hashlib.md5(combined).hexdigest()[:hash_length]
797
797
 
798
- @staticmethod
799
- def _find_prediction_column(df: pd.DataFrame, target_column: str) -> Optional[str]:
800
- """Find the prediction column in a DataFrame.
801
-
802
- Looks for 'prediction' column first, then '{target}_pred' pattern.
803
-
804
- Args:
805
- df: DataFrame to search
806
- target_column: Name of the target column (used for {target}_pred pattern)
807
-
808
- Returns:
809
- Name of the prediction column, or None if not found
810
- """
811
- # Check for 'prediction' column first (legacy/standard format)
812
- if "prediction" in df.columns:
813
- return "prediction"
814
-
815
- # Check for '{target}_pred' format (multi-target format)
816
- target_pred_col = f"{target_column}_pred"
817
- if target_pred_col in df.columns:
818
- return target_pred_col
819
-
820
- return None
821
-
822
798
  def _capture_inference_results(
823
799
  self,
824
800
  capture_name: str,
@@ -946,29 +922,23 @@ class EndpointCore(Artifact):
946
922
  self.log.warning("No predictions were made. Returning empty DataFrame.")
947
923
  return pd.DataFrame()
948
924
 
949
- # Find the prediction column: "prediction" or "{target}_pred"
950
- prediction_col = self._find_prediction_column(prediction_df, target_column)
951
- if prediction_col is None:
952
- self.log.warning(f"No prediction column found for target '{target_column}'")
925
+ # Check for prediction column
926
+ if "prediction" not in prediction_df.columns:
927
+ self.log.warning("No 'prediction' column found in DataFrame")
953
928
  return pd.DataFrame()
954
929
 
955
930
  # Check for NaN values in target or prediction columns
956
- if prediction_df[target_column].isnull().any() or prediction_df[prediction_col].isnull().any():
957
- # Compute the number of NaN values in each column
931
+ if prediction_df[target_column].isnull().any() or prediction_df["prediction"].isnull().any():
958
932
  num_nan_target = prediction_df[target_column].isnull().sum()
959
- num_nan_prediction = prediction_df[prediction_col].isnull().sum()
960
- self.log.warning(
961
- f"NaNs Found: {target_column} {num_nan_target} and {prediction_col}: {num_nan_prediction}."
962
- )
963
- self.log.warning(
964
- "NaN values found in target or prediction columns. Dropping NaN rows for metric computation."
965
- )
966
- prediction_df = prediction_df.dropna(subset=[target_column, prediction_col])
933
+ num_nan_prediction = prediction_df["prediction"].isnull().sum()
934
+ self.log.warning(f"NaNs Found: {target_column} {num_nan_target} and prediction: {num_nan_prediction}.")
935
+ self.log.warning("Dropping NaN rows for metric computation.")
936
+ prediction_df = prediction_df.dropna(subset=[target_column, "prediction"])
967
937
 
968
938
  # Compute the metrics
969
939
  try:
970
940
  y_true = prediction_df[target_column]
971
- y_pred = prediction_df[prediction_col]
941
+ y_pred = prediction_df["prediction"]
972
942
 
973
943
  mae = mean_absolute_error(y_true, y_pred)
974
944
  rmse = np.sqrt(mean_squared_error(y_true, y_pred))
@@ -1000,17 +970,13 @@ class EndpointCore(Artifact):
1000
970
  Returns:
1001
971
  pd.DataFrame: DataFrame with two new columns called 'residuals' and 'residuals_abs'
1002
972
  """
1003
-
1004
- # Compute the residuals
1005
- y_true = prediction_df[target_column]
1006
-
1007
- # Find the prediction column: "prediction" or "{target}_pred"
1008
- prediction_col = self._find_prediction_column(prediction_df, target_column)
1009
- if prediction_col is None:
1010
- self.log.warning(f"No prediction column found for target '{target_column}'. Cannot compute residuals.")
973
+ # Check for prediction column
974
+ if "prediction" not in prediction_df.columns:
975
+ self.log.warning("No 'prediction' column found. Cannot compute residuals.")
1011
976
  return prediction_df
1012
977
 
1013
- y_pred = prediction_df[prediction_col]
978
+ y_true = prediction_df[target_column]
979
+ y_pred = prediction_df["prediction"]
1014
980
 
1015
981
  # Check for classification scenario
1016
982
  if not pd.api.types.is_numeric_dtype(y_true) or not pd.api.types.is_numeric_dtype(y_pred):
@@ -1051,14 +1017,13 @@ class EndpointCore(Artifact):
1051
1017
  Returns:
1052
1018
  pd.DataFrame: DataFrame with the performance metrics
1053
1019
  """
1054
- # Find the prediction column: "prediction" or "{target}_pred"
1055
- prediction_col = self._find_prediction_column(prediction_df, target_column)
1056
- if prediction_col is None:
1057
- self.log.warning(f"No prediction column found for target '{target_column}'")
1020
+ # Check for prediction column
1021
+ if "prediction" not in prediction_df.columns:
1022
+ self.log.warning("No 'prediction' column found in DataFrame")
1058
1023
  return pd.DataFrame()
1059
1024
 
1060
1025
  # Drop rows with NaN predictions (can't compute metrics on missing predictions)
1061
- nan_mask = prediction_df[prediction_col].isna()
1026
+ nan_mask = prediction_df["prediction"].isna()
1062
1027
  if nan_mask.any():
1063
1028
  n_nan = nan_mask.sum()
1064
1029
  self.log.warning(f"Dropping {n_nan} rows with NaN predictions for metrics calculation")
@@ -1078,7 +1043,7 @@ class EndpointCore(Artifact):
1078
1043
  # Calculate precision, recall, f1, and support, handling zero division
1079
1044
  scores = precision_recall_fscore_support(
1080
1045
  prediction_df[target_column],
1081
- prediction_df[prediction_col],
1046
+ prediction_df["prediction"],
1082
1047
  average=None,
1083
1048
  labels=class_labels,
1084
1049
  zero_division=0,
@@ -1126,21 +1091,20 @@ class EndpointCore(Artifact):
1126
1091
  Returns:
1127
1092
  pd.DataFrame: DataFrame with the confusion matrix
1128
1093
  """
1129
- # Find the prediction column: "prediction" or "{target}_pred"
1130
- prediction_col = self._find_prediction_column(prediction_df, target_column)
1131
- if prediction_col is None:
1132
- self.log.warning(f"No prediction column found for target '{target_column}'")
1094
+ # Check for prediction column
1095
+ if "prediction" not in prediction_df.columns:
1096
+ self.log.warning("No 'prediction' column found in DataFrame")
1133
1097
  return pd.DataFrame()
1134
1098
 
1135
1099
  # Drop rows with NaN predictions (can't include in confusion matrix)
1136
- nan_mask = prediction_df[prediction_col].isna()
1100
+ nan_mask = prediction_df["prediction"].isna()
1137
1101
  if nan_mask.any():
1138
1102
  n_nan = nan_mask.sum()
1139
1103
  self.log.warning(f"Dropping {n_nan} rows with NaN predictions for confusion matrix")
1140
1104
  prediction_df = prediction_df[~nan_mask].copy()
1141
1105
 
1142
1106
  y_true = prediction_df[target_column]
1143
- y_pred = prediction_df[prediction_col]
1107
+ y_pred = prediction_df["prediction"]
1144
1108
 
1145
1109
  # Get model class labels
1146
1110
  model_class_labels = ModelCore(self.model_name).class_labels()
@@ -102,10 +102,21 @@ class ModelToEndpoint(Transform):
102
102
  # Is this a serverless deployment?
103
103
  serverless_config = None
104
104
  if self.serverless:
105
+ # For PyTorch or ChemProp we need at least 4GB of memory
106
+ from workbench.api import ModelFramework
107
+
108
+ self.log.info(f"Model Framework: {workbench_model.model_framework}")
109
+ if workbench_model.model_framework in [ModelFramework.PYTORCH_TABULAR, ModelFramework.CHEMPROP]:
110
+ if mem_size < 4096:
111
+ self.log.important(
112
+ f"{workbench_model.model_framework} needs at least 4GB of memory (setting to 4GB)"
113
+ )
114
+ mem_size = 4096
105
115
  serverless_config = ServerlessInferenceConfig(
106
116
  memory_size_in_mb=mem_size,
107
117
  max_concurrency=max_concurrency,
108
118
  )
119
+ self.log.important(f"Serverless Config: Memory={mem_size}MB, MaxConcurrency={max_concurrency}")
109
120
 
110
121
  # Configure data capture if requested (and not serverless)
111
122
  data_capture_config = None
@@ -25,6 +25,7 @@
25
25
  # - argparse, file loading, S3 writes
26
26
  # =============================
27
27
 
28
+ import glob
28
29
  import os
29
30
  import argparse
30
31
  import json
@@ -185,7 +186,7 @@ def build_mpnn_model(
185
186
  # Model hyperparameters with defaults
186
187
  hidden_dim = hyperparameters.get("hidden_dim", 700)
187
188
  depth = hyperparameters.get("depth", 6)
188
- dropout = hyperparameters.get("dropout", 0.25)
189
+ dropout = hyperparameters.get("dropout", 0.15)
189
190
  ffn_hidden_dim = hyperparameters.get("ffn_hidden_dim", 2000)
190
191
  ffn_num_layers = hyperparameters.get("ffn_num_layers", 2)
191
192
 
@@ -468,6 +469,11 @@ def predict_fn(df: pd.DataFrame, model_dict: dict) -> pd.DataFrame:
468
469
  df.loc[valid_mask, f"{tc}_pred"] = preds[:, t_idx]
469
470
  df.loc[valid_mask, f"{tc}_pred_std"] = preds_std[:, t_idx]
470
471
 
472
+ # Add prediction/prediction_std aliases for first target
473
+ first_target = target_columns[0]
474
+ df["prediction"] = df[f"{first_target}_pred"]
475
+ df["prediction_std"] = df[f"{first_target}_pred_std"]
476
+
471
477
  return df
472
478
 
473
479
 
@@ -881,6 +887,11 @@ if __name__ == "__main__":
881
887
  else:
882
888
  df_val[f"{t_name}_pred_std"] = 0.0
883
889
 
890
+ # Add prediction/prediction_std aliases for first target
891
+ first_target = target_columns[0]
892
+ df_val["prediction"] = df_val[f"{first_target}_pred"]
893
+ df_val["prediction_std"] = df_val[f"{first_target}_pred_std"]
894
+
884
895
  # Save validation predictions to S3
885
896
  # Include id_column if it exists in df_val
886
897
  output_columns = []
@@ -890,6 +901,7 @@ if __name__ == "__main__":
890
901
  output_columns += target_columns
891
902
  output_columns += [f"{t}_pred" for t in target_columns]
892
903
  output_columns += [f"{t}_pred_std" for t in target_columns]
904
+ output_columns += ["prediction", "prediction_std"]
893
905
  # Add proba columns for classifiers
894
906
  output_columns += [col for col in df_val.columns if col.endswith("_proba")]
895
907
  # Filter to only columns that exist
@@ -906,6 +918,11 @@ if __name__ == "__main__":
906
918
  models.save_model(model_path, ens_model)
907
919
  print(f"Saved model {model_idx + 1} to {model_path}")
908
920
 
921
+ # Clean up checkpoint files (not needed for inference, reduces artifact size)
922
+ for ckpt_file in glob.glob(os.path.join(args.model_dir, "best_model_*.ckpt")):
923
+ os.remove(ckpt_file)
924
+ print(f"Removed checkpoint: {ckpt_file}")
925
+
909
926
  # Save ensemble metadata (n_ensemble = number of models for inference)
910
927
  n_ensemble = len(ensemble_models)
911
928
  ensemble_metadata = {
@@ -25,6 +25,7 @@
25
25
  # - argparse, file loading, S3 writes
26
26
  # =============================
27
27
 
28
+ import glob
28
29
  import os
29
30
  import argparse
30
31
  import json
@@ -53,12 +54,12 @@ from chemprop import data, models, nn
53
54
 
54
55
  # Template Parameters
55
56
  TEMPLATE_PARAMS = {
56
- "model_type": "regressor",
57
- "targets": ['logd', 'ksol', 'hlm_clint', 'mlm_clint', 'caco_2_papp_a_b', 'caco_2_efflux', 'mppb', 'mbpb', 'mgmb'], # List of target columns (single or multi-task)
58
- "feature_list": ['smiles'],
59
- "id_column": "molecule_name",
60
- "model_metrics_s3_path": "s3://sandbox-sageworks-artifacts/models/open-admet-chemprop-mt/training",
61
- "hyperparameters": {},
57
+ "model_type": "uq_regressor",
58
+ "targets": ['udm_asy_res_efflux_ratio'], # List of target columns (single or multi-task)
59
+ "feature_list": ['smiles', 'smr_vsa4', 'tpsa', 'nhohcount', 'mollogp', 'peoe_vsa1', 'smr_vsa3', 'nitrogen_span', 'numhdonors', 'minpartialcharge', 'vsa_estate3', 'vsa_estate6', 'tertiary_amine_count', 'hba_hbd_ratio', 'peoe_vsa8', 'estate_vsa4', 'xc_4dv', 'vsa_estate2', 'molmr', 'xp_2dv', 'mi', 'molecular_axis_length', 'vsa_estate4', 'xp_6dv', 'qed', 'estate_vsa8', 'chi1v', 'asphericity', 'axp_1d', 'bcut2d_logphi', 'kappa3', 'axp_7d', 'num_s_centers', 'amphiphilic_moment', 'molecular_asymmetry', 'charge_centroid_distance', 'estate_vsa3', 'vsa_estate8', 'aromatic_interaction_score', 'molecular_volume_3d', 'axp_7dv', 'peoe_vsa3', 'smr_vsa6', 'bcut2d_mrhi', 'radius_of_gyration', 'xpc_4dv', 'minabsestateindex', 'axp_0dv', 'chi4n', 'balabanj', 'bcut2d_mwlow'],
60
+ "id_column": "udm_mol_bat_id",
61
+ "model_metrics_s3_path": "s3://ideaya-sageworks-bucket/models/caco2-er-chemprop-reg-hybrid/training",
62
+ "hyperparameters": {'n_folds': 5, 'hidden_dim': 700, 'depth': 6, 'dropout': 0.15, 'ffn_hidden_dim': 2000, 'ffn_num_layers': 2},
62
63
  }
63
64
 
64
65
 
@@ -185,7 +186,7 @@ def build_mpnn_model(
185
186
  # Model hyperparameters with defaults
186
187
  hidden_dim = hyperparameters.get("hidden_dim", 700)
187
188
  depth = hyperparameters.get("depth", 6)
188
- dropout = hyperparameters.get("dropout", 0.25)
189
+ dropout = hyperparameters.get("dropout", 0.15)
189
190
  ffn_hidden_dim = hyperparameters.get("ffn_hidden_dim", 2000)
190
191
  ffn_num_layers = hyperparameters.get("ffn_num_layers", 2)
191
192
 
@@ -468,6 +469,11 @@ def predict_fn(df: pd.DataFrame, model_dict: dict) -> pd.DataFrame:
468
469
  df.loc[valid_mask, f"{tc}_pred"] = preds[:, t_idx]
469
470
  df.loc[valid_mask, f"{tc}_pred_std"] = preds_std[:, t_idx]
470
471
 
472
+ # Add prediction/prediction_std aliases for first target
473
+ first_target = target_columns[0]
474
+ df["prediction"] = df[f"{first_target}_pred"]
475
+ df["prediction_std"] = df[f"{first_target}_pred_std"]
476
+
471
477
  return df
472
478
 
473
479
 
@@ -881,6 +887,11 @@ if __name__ == "__main__":
881
887
  else:
882
888
  df_val[f"{t_name}_pred_std"] = 0.0
883
889
 
890
+ # Add prediction/prediction_std aliases for first target
891
+ first_target = target_columns[0]
892
+ df_val["prediction"] = df_val[f"{first_target}_pred"]
893
+ df_val["prediction_std"] = df_val[f"{first_target}_pred_std"]
894
+
884
895
  # Save validation predictions to S3
885
896
  # Include id_column if it exists in df_val
886
897
  output_columns = []
@@ -890,6 +901,7 @@ if __name__ == "__main__":
890
901
  output_columns += target_columns
891
902
  output_columns += [f"{t}_pred" for t in target_columns]
892
903
  output_columns += [f"{t}_pred_std" for t in target_columns]
904
+ output_columns += ["prediction", "prediction_std"]
893
905
  # Add proba columns for classifiers
894
906
  output_columns += [col for col in df_val.columns if col.endswith("_proba")]
895
907
  # Filter to only columns that exist
@@ -906,6 +918,11 @@ if __name__ == "__main__":
906
918
  models.save_model(model_path, ens_model)
907
919
  print(f"Saved model {model_idx + 1} to {model_path}")
908
920
 
921
+ # Clean up checkpoint files (not needed for inference, reduces artifact size)
922
+ for ckpt_file in glob.glob(os.path.join(args.model_dir, "best_model_*.ckpt")):
923
+ os.remove(ckpt_file)
924
+ print(f"Removed checkpoint: {ckpt_file}")
925
+
909
926
  # Save ensemble metadata (n_ensemble = number of models for inference)
910
927
  n_ensemble = len(ensemble_models)
911
928
  ensemble_metadata = {
@@ -19,11 +19,11 @@ from typing import List, Tuple, Optional, Dict
19
19
 
20
20
  # Template Placeholders
21
21
  TEMPLATE_PARAMS = {
22
- "target": "mppb",
23
- "features": ['chi2v', 'fr_sulfone', 'chi1v', 'bcut2d_logplow', 'fr_piperzine', 'kappa3', 'smr_vsa1', 'slogp_vsa5', 'fr_ketone_topliss', 'fr_sulfonamd', 'fr_imine', 'fr_benzene', 'fr_ester', 'chi2n', 'labuteasa', 'peoe_vsa2', 'smr_vsa6', 'bcut2d_chglo', 'fr_sh', 'peoe_vsa1', 'fr_allylic_oxid', 'chi4n', 'fr_ar_oh', 'fr_nh0', 'fr_term_acetylene', 'slogp_vsa7', 'slogp_vsa4', 'estate_vsa1', 'vsa_estate4', 'numbridgeheadatoms', 'numheterocycles', 'fr_ketone', 'fr_morpholine', 'fr_guanido', 'estate_vsa2', 'numheteroatoms', 'fr_nitro_arom_nonortho', 'fr_piperdine', 'nocount', 'numspiroatoms', 'fr_aniline', 'fr_thiophene', 'slogp_vsa10', 'fr_amide', 'slogp_vsa2', 'fr_epoxide', 'vsa_estate7', 'fr_ar_coo', 'fr_imidazole', 'fr_nitrile', 'fr_oxazole', 'numsaturatedrings', 'fr_pyridine', 'fr_hoccn', 'fr_ndealkylation1', 'numaliphaticheterocycles', 'fr_phenol', 'maxpartialcharge', 'vsa_estate5', 'peoe_vsa13', 'minpartialcharge', 'qed', 'fr_al_oh', 'slogp_vsa11', 'chi0n', 'fr_bicyclic', 'peoe_vsa12', 'fpdensitymorgan1', 'fr_oxime', 'molwt', 'fr_dihydropyridine', 'smr_vsa5', 'peoe_vsa5', 'fr_nitro', 'hallkieralpha', 'heavyatommolwt', 'fr_alkyl_halide', 'peoe_vsa8', 'fr_nhpyrrole', 'fr_isocyan', 'bcut2d_chghi', 'fr_lactam', 'peoe_vsa11', 'smr_vsa9', 'tpsa', 'chi4v', 'slogp_vsa1', 'phi', 'bcut2d_logphi', 'avgipc', 'estate_vsa11', 'fr_coo', 'bcut2d_mwhi', 'numunspecifiedatomstereocenters', 'vsa_estate10', 'estate_vsa8', 'numvalenceelectrons', 'fr_nh2', 'fr_lactone', 'vsa_estate1', 'estate_vsa4', 'numatomstereocenters', 'vsa_estate8', 'fr_para_hydroxylation', 'peoe_vsa3', 'fr_thiazole', 'peoe_vsa10', 'fr_ndealkylation2', 'slogp_vsa12', 'peoe_vsa9', 'maxestateindex', 'fr_quatn', 'smr_vsa7', 'minestateindex', 'numaromaticheterocycles', 'numrotatablebonds', 'fr_ar_nh', 'fr_ether', 'exactmolwt', 'fr_phenol_noorthohbond', 'slogp_vsa3', 'fr_ar_n', 'sps', 'fr_c_o_nocoo', 'bertzct', 'peoe_vsa7', 'slogp_vsa8', 'numradicalelectrons', 'molmr', 'fr_tetrazole', 'numsaturatedcarbocycles', 'bcut2d_mrhi', 'kappa1', 'numamidebonds', 'fpdensitymorgan2', 'smr_vsa8', 'chi1n', 'estate_vsa6', 'fr_barbitur', 'fr_diazo', 'kappa2', 'chi0', 'bcut2d_mrlow', 'balabanj', 'peoe_vsa4', 'numhacceptors', 'fr_sulfide', 'chi3n', 'smr_vsa2', 'fr_al_oh_notert', 'fr_benzodiazepine', 'fr_phos_ester', 'fr_aldehyde', 'fr_coo2', 'estate_vsa5', 'fr_prisulfonamd', 'numaromaticcarbocycles', 'fr_unbrch_alkane', 'fr_urea', 'fr_nitroso', 'smr_vsa10', 'fr_c_s', 'smr_vsa3', 'fr_methoxy', 'maxabspartialcharge', 'slogp_vsa9', 'heavyatomcount', 'fr_azide', 'chi3v', 'smr_vsa4', 'mollogp', 'chi0v', 'fr_aryl_methyl', 'fr_nh1', 'fpdensitymorgan3', 'fr_furan', 'fr_hdrzine', 'fr_arn', 'numaromaticrings', 'vsa_estate3', 'fr_azo', 'fr_halogen', 'estate_vsa9', 'fr_hdrzone', 'numhdonors', 'fr_alkyl_carbamate', 'fr_isothiocyan', 'minabspartialcharge', 'fr_al_coo', 'ringcount', 'chi1', 'estate_vsa7', 'fr_nitro_arom', 'vsa_estate9', 'minabsestateindex', 'maxabsestateindex', 'vsa_estate6', 'estate_vsa10', 'estate_vsa3', 'fr_n_o', 'fr_amidine', 'fr_thiocyan', 'fr_phos_acid', 'fr_c_o', 'fr_imide', 'numaliphaticrings', 'peoe_vsa6', 'vsa_estate2', 'nhohcount', 'numsaturatedheterocycles', 'slogp_vsa6', 'peoe_vsa14', 'fractioncsp3', 'bcut2d_mwlow', 'numaliphaticcarbocycles', 'fr_priamide', 'nacid', 'nbase', 'naromatom', 'narombond', 'sz', 'sm', 'sv', 'sse', 'spe', 'sare', 'sp', 'si', 'mz', 'mm', 'mv', 'mse', 'mpe', 'mare', 'mp', 'mi', 'xch_3d', 'xch_4d', 'xch_5d', 'xch_6d', 'xch_7d', 'xch_3dv', 'xch_4dv', 'xch_5dv', 'xch_6dv', 'xch_7dv', 'xc_3d', 'xc_4d', 'xc_5d', 'xc_6d', 'xc_3dv', 'xc_4dv', 'xc_5dv', 'xc_6dv', 'xpc_4d', 'xpc_5d', 'xpc_6d', 'xpc_4dv', 'xpc_5dv', 'xpc_6dv', 'xp_0d', 'xp_1d', 'xp_2d', 'xp_3d', 'xp_4d', 'xp_5d', 'xp_6d', 'xp_7d', 'axp_0d', 'axp_1d', 'axp_2d', 'axp_3d', 'axp_4d', 'axp_5d', 'axp_6d', 'axp_7d', 'xp_0dv', 'xp_1dv', 'xp_2dv', 'xp_3dv', 'xp_4dv', 'xp_5dv', 'xp_6dv', 'xp_7dv', 'axp_0dv', 'axp_1dv', 'axp_2dv', 'axp_3dv', 'axp_4dv', 'axp_5dv', 'axp_6dv', 'axp_7dv', 'c1sp1', 'c2sp1', 'c1sp2', 'c2sp2', 'c3sp2', 'c1sp3', 'c2sp3', 'c3sp3', 'c4sp3', 'hybratio', 'fcsp3', 'num_stereocenters', 'num_unspecified_stereocenters', 'num_defined_stereocenters', 'num_r_centers', 'num_s_centers', 'num_stereobonds', 'num_e_bonds', 'num_z_bonds', 'stereo_complexity', 'frac_defined_stereo'],
22
+ "target": "udm_asy_res_efflux_ratio",
23
+ "features": ['smr_vsa4', 'tpsa', 'nhohcount', 'mollogp', 'peoe_vsa1', 'smr_vsa3', 'nitrogen_span', 'numhdonors', 'minpartialcharge', 'vsa_estate3', 'vsa_estate6', 'tertiary_amine_count', 'hba_hbd_ratio', 'peoe_vsa8', 'estate_vsa4', 'xc_4dv', 'vsa_estate2', 'molmr', 'xp_2dv', 'mi', 'molecular_axis_length', 'vsa_estate4', 'xp_6dv', 'qed', 'estate_vsa8', 'chi1v', 'asphericity', 'axp_1d', 'bcut2d_logphi', 'kappa3', 'axp_7d', 'num_s_centers', 'amphiphilic_moment', 'molecular_asymmetry', 'charge_centroid_distance', 'estate_vsa3', 'vsa_estate8', 'aromatic_interaction_score', 'molecular_volume_3d', 'axp_7dv', 'peoe_vsa3', 'smr_vsa6', 'bcut2d_mrhi', 'radius_of_gyration', 'xpc_4dv', 'minabsestateindex', 'axp_0dv', 'chi4n', 'balabanj', 'bcut2d_mwlow', 'estate_vsa2', 'axp_5d', 'maxestateindex', 'bcut2d_mrlow', 'type_ii_pattern_count', 'avgipc', 'slogp_vsa1', 'fr_nhpyrrole', 'xch_7d', 'axp_1dv', 'peoe_vsa9', 'xch_6d', 'xch_5dv', 'bcut2d_chglo', 'fpdensitymorgan1', 'fr_al_oh', 'axp_5dv', 'smr_vsa5', 'chi2v', 'estate_vsa6', 'smr_vsa9', 'minestateindex', 'bcut2d_logplow', 'c3sp3', 'xp_3d', 'vsa_estate9', 'nbase', 'peoe_vsa2', 'numatomstereocenters', 'xc_5dv', 'bcut2d_mwhi', 'nocount', 'slogp_vsa2', 'smr_vsa1', 'axp_6d', 'maxabspartialcharge', 'vsa_estate5', 'fpdensitymorgan2', 'xp_7d', 'peoe_vsa10', 'num_r_centers', 'mv', 'vsa_estate10', 'xp_0dv', 'axp_4d', 'fractioncsp3', 'smr_vsa10', 'xp_7dv', 'xp_4dv', 'hallkieralpha', 'numhacceptors', 'axp_3d', 'vsa_estate7', 'slogp_vsa3', 'peoe_vsa7', 'estate_vsa10', 'axp_2d', 'c1sp3', 'axp_2dv', 'slogp_vsa4', 'estate_vsa9', 'xch_7dv', 'sps', 'chi0n', 'axp_6dv', 'fr_imidazole', 'xpc_4d', 'bcut2d_chghi', 'chi3n', 'peoe_vsa11', 'xpc_6d', 'estate_vsa1', 'xch_6dv', 'chi3v', 'axp_3dv', 'xc_5d', 'slogp_vsa5', 'maxpartialcharge', 'estate_vsa5', 'fr_hoccn', 'heavyatommolwt', 'fr_ar_n', 'mz', 'xpc_5d', 'axp_4dv', 'xc_3dv', 'xp_6d', 'bertzct', 'peoe_vsa6', 'xc_3d', 'c2sp2', 'fpdensitymorgan3', 'xpc_5dv', 'intramolecular_hbond_potential', 'molwt', 'estate_vsa7', 'xp_5d', 'kappa1', 'xp_5dv', 'chi2n', 'axp_0d', 'xch_4dv', 'xp_4d', 'mp', 'chi1n', 'mm', 'fr_nh0', 'phi', 'labuteasa', 'xp_3dv', 'mse', 'xc_6dv', 'fr_piperzine', 'chi4v', 'xch_4d', 'fr_pyridine', 'xp_2d', 'num_stereocenters', 'minabspartialcharge', 'numaliphaticheterocycles', 'chi0v', 'type_i_pattern_count', 'fr_ketone_topliss', 'fr_ether', 'fr_priamide', 'num_defined_stereocenters', 'mare', 'peoe_vsa12', 'numheteroatoms', 'peoe_vsa4', 'peoe_vsa13', 'hybratio', 'numheterocycles', 'sse', 'fr_c_o_nocoo', 'fr_thiazole', 'slogp_vsa6', 'smr_vsa7', 'vsa_estate1', 'xch_5d', 'smr_vsa2', 'amide_count', 'fr_amide', 'chi1', 'fr_urea', 'fr_aniline', 'fr_aryl_methyl', 'fr_piperdine', 'numrotatablebonds', 'fr_nitrile', 'si', 'sp', 'fr_morpholine', 'numsaturatedheterocycles', 'kappa2', 'chi0', 'mpe', 'sare', 'numaromaticheterocycles', 'naromatom', 'xpc_6dv', 'fr_nh1', 'slogp_vsa10', 'numsaturatedrings', 'c2sp3', 'slogp_vsa8', 'c1sp2', 'fr_ndealkylation2', 'spe', 'xc_6d', 'slogp_vsa11', 'numaliphaticcarbocycles', 'frac_defined_stereo', 'numunspecifiedatomstereocenters', 'fr_benzene', 'xp_1d', 'xc_4d', 'fr_methoxy', 'c4sp3', 'numvalenceelectrons', 'fr_bicyclic', 'fr_imine', 'fr_sulfone', 'c3sp2', 'ringcount', 'slogp_vsa7', 'exactmolwt', 'fr_al_oh_notert', 'fr_tetrazole', 'peoe_vsa14', 'xch_3d', 'fr_para_hydroxylation', 'numspiroatoms'],
24
24
  "compressed_features": [],
25
25
  "train_all_data": True,
26
- "hyperparameters": {'objective': 'reg:absoluteerror', 'n_estimators': 300, 'max_depth': 6, 'learning_rate': 0.03, 'subsample': 0.8, 'colsample_bytree': 0.6, 'colsample_bylevel': 0.8, 'min_child_weight': 5, 'gamma': 0.1, 'reg_alpha': 0.3, 'reg_lambda': 1.5, 'random_state': 42},
26
+ "hyperparameters": {'n_estimators': 500, 'max_depth': 6, 'learning_rate': 0.04},
27
27
  }
28
28
 
29
29
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: workbench
3
- Version: 0.8.203
3
+ Version: 0.8.204
4
4
  Summary: Workbench: A Dashboard and Python API for creating and deploying AWS SageMaker Model Pipelines
5
5
  Author-email: SuperCowPowers LLC <support@supercowpowers.com>
6
6
  License: MIT License
@@ -55,7 +55,7 @@ workbench/core/artifacts/data_capture_core.py,sha256=q8f79rRTYiZ7T4IQRWXl8ZvPpcv
55
55
  workbench/core/artifacts/data_source_abstract.py,sha256=5IRCzFVK-17cd4NXPMRfx99vQAmQ0WHE5jcm5RfsVTg,10619
56
56
  workbench/core/artifacts/data_source_factory.py,sha256=YL_tA5fsgubbB3dPF6T4tO0rGgz-6oo3ge4i_YXVC-M,2380
57
57
  workbench/core/artifacts/df_store_core.py,sha256=AueNr_JvuLLu_ByE7cb3u-isH9u0Q7cMP-UCgCX-Ctg,3536
58
- workbench/core/artifacts/endpoint_core.py,sha256=oWWJSXSod5JzI7b4JvoxKWm46lv0FNZZf_FIZR4ZP9Q,60832
58
+ workbench/core/artifacts/endpoint_core.py,sha256=8HlZw4ZTsSIqYrCsbawfqrQMQo4Y9paiudq8ihN1Mzo,59317
59
59
  workbench/core/artifacts/feature_set_core.py,sha256=wZy-02WXWmSBet5t8mWXFRdv9O4MtW3hWqJuVv7Kok0,39330
60
60
  workbench/core/artifacts/model_core.py,sha256=QIgV5MJr8aDY63in83thdNc5-bzkWLn5f5vvsS4aNYo,52348
61
61
  workbench/core/artifacts/monitor_core.py,sha256=M307yz7tEzOEHgv-LmtVy9jKjSbM98fHW3ckmNYrwlU,27897
@@ -104,7 +104,7 @@ workbench/core/transforms/features_to_features/heavy/glue/Readme.md,sha256=TuyCa
104
104
  workbench/core/transforms/features_to_model/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
105
105
  workbench/core/transforms/features_to_model/features_to_model.py,sha256=JdKKz3eKrKhicA1WxTfmb1IqQNCdHJE0CKDs66bLHYU,21071
106
106
  workbench/core/transforms/model_to_endpoint/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
107
- workbench/core/transforms/model_to_endpoint/model_to_endpoint.py,sha256=TIYXvuK0s383PwJ4iS6fCRhuif6oIxsoWb4CpMGJjY4,6358
107
+ workbench/core/transforms/model_to_endpoint/model_to_endpoint.py,sha256=QjfUY_Ay2-W8OszWw2vGtsKfnMY7VjiWQmnjuzLBITk,7020
108
108
  workbench/core/transforms/pandas_transforms/__init__.py,sha256=xL4MT8-fZ1SFqDbTLc8XyxjupHtB1YR6Ej0AC2nwd7I,894
109
109
  workbench/core/transforms/pandas_transforms/data_to_pandas.py,sha256=sJHPeuNF8Q8aQqgRnkdWkyvur5cbggdUVIwR-xF3Dlo,3621
110
110
  workbench/core/transforms/pandas_transforms/features_to_pandas.py,sha256=af6xdPt2V4zhh-SzQa_UYxdmNMzMLXbrbsznV5QoIJg,3441
@@ -123,8 +123,8 @@ workbench/core/views/view.py,sha256=DvmEA1xdvL980GET_cnbmHzqSy6IhlNaZcoQnVTtYis,
123
123
  workbench/core/views/view_utils.py,sha256=CwOlpqXpumCr6REi-ey7Qjz5_tpg-s4oWHmlOVu8POQ,12270
124
124
  workbench/core/views/storage/mdq_view.py,sha256=qf_ep1KwaXOIfO930laEwNIiCYP7VNOqjE3VdHfopRE,5195
125
125
  workbench/model_scripts/script_generation.py,sha256=_AhzM2qzjBuI7pIaXBRZ1YOOs2lwsKQGVM_ovL6T1bo,8135
126
- workbench/model_scripts/chemprop/chemprop.template,sha256=NR1jMb-IPxBAaQ-KiPR09ylL_gTIC35lZwBpBQPtzig,38109
127
- workbench/model_scripts/chemprop/generated_model_script.py,sha256=Cxfbu7mNf_HLBCzlsOOXR1u1Y-eHMma63YWM9l8ku44,38206
126
+ workbench/model_scripts/chemprop/chemprop.template,sha256=XcRBEz_JYS1Vjv9MI_5BalvrWL9v2vTq1eRlVpLAtPE,38883
127
+ workbench/model_scripts/chemprop/generated_model_script.py,sha256=lSr5qHZljCzttxlq4YwypUYmYbIAl7flo5RT8nXt_vs,39755
128
128
  workbench/model_scripts/chemprop/requirements.txt,sha256=PIuUdPAeDUH3I2M_5nIrCnCfs3FL1l9V5kzHqgCcu7s,281
129
129
  workbench/model_scripts/custom_models/chem_info/Readme.md,sha256=mH1lxJ4Pb7F5nBnVXaiuxpi8zS_yjUw_LBJepVKXhlA,574
130
130
  workbench/model_scripts/custom_models/chem_info/fingerprints.py,sha256=Qvs8jaUwguWUq3Q3j695MY0t0Wk3BvroW-oWBwalMUo,5255
@@ -157,7 +157,7 @@ workbench/model_scripts/pytorch_model/requirements.txt,sha256=ICS5nW0wix44EJO2tJ
157
157
  workbench/model_scripts/scikit_learn/generated_model_script.py,sha256=xhQIglpAgPRCH9iwI3wI0N0V6p9AgqW0mVOMuSXzUCk,17187
158
158
  workbench/model_scripts/scikit_learn/requirements.txt,sha256=aVvwiJ3LgBUhM_PyFlb2gHXu_kpGPho3ANBzlOkfcvs,107
159
159
  workbench/model_scripts/scikit_learn/scikit_learn.template,sha256=QQvqx-eX9ZTbYmyupq6R6vIQwosmsmY_MRBPaHyfjdk,12586
160
- workbench/model_scripts/uq_models/generated_model_script.py,sha256=caAXcK03XQQcPo2rvFJtZqnwQpLAz7v0CQWBWDO2Dts,27866
160
+ workbench/model_scripts/uq_models/generated_model_script.py,sha256=ivKtB-3MdJVnppxKez4Vz7jLW5i_sb0gLTGOcYJZ_PM,26758
161
161
  workbench/model_scripts/uq_models/mapie.template,sha256=on3I40D7zyNfvfqBf5k8VXCFtmepcxKmqVWCH5Q9S84,23432
162
162
  workbench/model_scripts/uq_models/requirements.txt,sha256=fw7T7t_YJAXK3T6Ysbesxh_Agx_tv0oYx72cEBTqRDY,98
163
163
  workbench/model_scripts/xgb_model/generated_model_script.py,sha256=qUGg5R-boaswzXtgKp_J7JPxFzMdRNv51QeF-lMWL-4,19334
@@ -291,9 +291,9 @@ workbench/web_interface/page_views/main_page.py,sha256=X4-KyGTKLAdxR-Zk2niuLJB2Y
291
291
  workbench/web_interface/page_views/models_page_view.py,sha256=M0bdC7bAzLyIaE2jviY12FF4abdMFZmg6sFuOY_LaGI,2650
292
292
  workbench/web_interface/page_views/page_view.py,sha256=Gh6YnpOGlUejx-bHZAf5pzqoQ1H1R0OSwOpGhOBO06w,455
293
293
  workbench/web_interface/page_views/pipelines_page_view.py,sha256=v2pxrIbsHBcYiblfius3JK766NZ7ciD2yPx0t3E5IJo,2656
294
- workbench-0.8.203.dist-info/licenses/LICENSE,sha256=RTBoTMeEwTgEhS-n8vgQ-VUo5qig0PWVd8xFPKU6Lck,1080
295
- workbench-0.8.203.dist-info/METADATA,sha256=qC58O-dE5_EMFpEJWDa9fyPSNwRt-n6K7krsrsJP13I,10500
296
- workbench-0.8.203.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
297
- workbench-0.8.203.dist-info/entry_points.txt,sha256=j02NCuno2Y_BuE4jEvw-IL73WZ9lkTpLwom29uKcLCw,458
298
- workbench-0.8.203.dist-info/top_level.txt,sha256=Dhy72zTxaA_o_yRkPZx5zw-fwumnjGaeGf0hBN3jc_w,10
299
- workbench-0.8.203.dist-info/RECORD,,
294
+ workbench-0.8.204.dist-info/licenses/LICENSE,sha256=RTBoTMeEwTgEhS-n8vgQ-VUo5qig0PWVd8xFPKU6Lck,1080
295
+ workbench-0.8.204.dist-info/METADATA,sha256=M0chxsmVPAHxPCzxcL9fzH_nS3fu2Bc_DCkDGpaPyeU,10500
296
+ workbench-0.8.204.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
297
+ workbench-0.8.204.dist-info/entry_points.txt,sha256=j02NCuno2Y_BuE4jEvw-IL73WZ9lkTpLwom29uKcLCw,458
298
+ workbench-0.8.204.dist-info/top_level.txt,sha256=Dhy72zTxaA_o_yRkPZx5zw-fwumnjGaeGf0hBN3jc_w,10
299
+ workbench-0.8.204.dist-info/RECORD,,