workbench 0.8.160__py3-none-any.whl → 0.8.202__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of workbench might be problematic. Click here for more details.

Files changed (114) hide show
  1. workbench/algorithms/dataframe/__init__.py +1 -2
  2. workbench/algorithms/dataframe/fingerprint_proximity.py +2 -2
  3. workbench/algorithms/dataframe/proximity.py +261 -235
  4. workbench/algorithms/graph/light/proximity_graph.py +10 -8
  5. workbench/api/__init__.py +2 -1
  6. workbench/api/compound.py +1 -1
  7. workbench/api/endpoint.py +11 -0
  8. workbench/api/feature_set.py +12 -8
  9. workbench/api/meta.py +5 -2
  10. workbench/api/model.py +16 -15
  11. workbench/api/monitor.py +1 -16
  12. workbench/api/parameter_store.py +5 -0
  13. workbench/core/artifacts/__init__.py +11 -2
  14. workbench/core/artifacts/artifact.py +11 -3
  15. workbench/core/artifacts/data_capture_core.py +355 -0
  16. workbench/core/artifacts/endpoint_core.py +256 -118
  17. workbench/core/artifacts/feature_set_core.py +265 -16
  18. workbench/core/artifacts/model_core.py +110 -63
  19. workbench/core/artifacts/monitor_core.py +33 -248
  20. workbench/core/cloud_platform/aws/aws_account_clamp.py +50 -1
  21. workbench/core/cloud_platform/aws/aws_meta.py +12 -5
  22. workbench/core/cloud_platform/aws/aws_parameter_store.py +18 -2
  23. workbench/core/cloud_platform/aws/aws_session.py +4 -4
  24. workbench/core/transforms/data_to_features/light/molecular_descriptors.py +4 -4
  25. workbench/core/transforms/features_to_model/features_to_model.py +45 -33
  26. workbench/core/transforms/model_to_endpoint/model_to_endpoint.py +36 -6
  27. workbench/core/transforms/pandas_transforms/pandas_to_features.py +27 -0
  28. workbench/core/views/training_view.py +113 -42
  29. workbench/core/views/view.py +53 -3
  30. workbench/core/views/view_utils.py +4 -4
  31. workbench/model_scripts/chemprop/chemprop.template +852 -0
  32. workbench/model_scripts/chemprop/generated_model_script.py +852 -0
  33. workbench/model_scripts/chemprop/requirements.txt +11 -0
  34. workbench/model_scripts/custom_models/chem_info/fingerprints.py +134 -0
  35. workbench/model_scripts/custom_models/chem_info/mol_descriptors.py +483 -0
  36. workbench/model_scripts/custom_models/chem_info/mol_standardize.py +450 -0
  37. workbench/model_scripts/custom_models/chem_info/molecular_descriptors.py +7 -9
  38. workbench/model_scripts/custom_models/chem_info/morgan_fingerprints.py +1 -1
  39. workbench/model_scripts/custom_models/proximity/feature_space_proximity.template +3 -5
  40. workbench/model_scripts/custom_models/proximity/proximity.py +261 -235
  41. workbench/model_scripts/custom_models/uq_models/bayesian_ridge.template +7 -8
  42. workbench/model_scripts/custom_models/uq_models/ensemble_xgb.template +20 -21
  43. workbench/model_scripts/custom_models/uq_models/gaussian_process.template +5 -11
  44. workbench/model_scripts/custom_models/uq_models/meta_uq.template +166 -62
  45. workbench/model_scripts/custom_models/uq_models/ngboost.template +30 -18
  46. workbench/model_scripts/custom_models/uq_models/proximity.py +261 -235
  47. workbench/model_scripts/custom_models/uq_models/requirements.txt +1 -3
  48. workbench/model_scripts/ensemble_xgb/ensemble_xgb.template +15 -17
  49. workbench/model_scripts/pytorch_model/generated_model_script.py +390 -188
  50. workbench/model_scripts/pytorch_model/pytorch.template +387 -176
  51. workbench/model_scripts/scikit_learn/generated_model_script.py +7 -12
  52. workbench/model_scripts/scikit_learn/scikit_learn.template +4 -9
  53. workbench/model_scripts/script_generation.py +19 -10
  54. workbench/model_scripts/uq_models/generated_model_script.py +605 -0
  55. workbench/model_scripts/uq_models/mapie.template +605 -0
  56. workbench/model_scripts/uq_models/requirements.txt +1 -0
  57. workbench/model_scripts/xgb_model/generated_model_script.py +37 -46
  58. workbench/model_scripts/xgb_model/xgb_model.template +44 -46
  59. workbench/repl/workbench_shell.py +28 -14
  60. workbench/scripts/endpoint_test.py +162 -0
  61. workbench/scripts/lambda_test.py +73 -0
  62. workbench/scripts/ml_pipeline_batch.py +137 -0
  63. workbench/scripts/ml_pipeline_sqs.py +186 -0
  64. workbench/scripts/monitor_cloud_watch.py +20 -100
  65. workbench/utils/aws_utils.py +4 -3
  66. workbench/utils/chem_utils/__init__.py +0 -0
  67. workbench/utils/chem_utils/fingerprints.py +134 -0
  68. workbench/utils/chem_utils/misc.py +194 -0
  69. workbench/utils/chem_utils/mol_descriptors.py +483 -0
  70. workbench/utils/chem_utils/mol_standardize.py +450 -0
  71. workbench/utils/chem_utils/mol_tagging.py +348 -0
  72. workbench/utils/chem_utils/projections.py +209 -0
  73. workbench/utils/chem_utils/salts.py +256 -0
  74. workbench/utils/chem_utils/sdf.py +292 -0
  75. workbench/utils/chem_utils/toxicity.py +250 -0
  76. workbench/utils/chem_utils/vis.py +253 -0
  77. workbench/utils/chemprop_utils.py +760 -0
  78. workbench/utils/cloudwatch_handler.py +1 -1
  79. workbench/utils/cloudwatch_utils.py +137 -0
  80. workbench/utils/config_manager.py +3 -7
  81. workbench/utils/endpoint_utils.py +5 -7
  82. workbench/utils/license_manager.py +2 -6
  83. workbench/utils/model_utils.py +95 -34
  84. workbench/utils/monitor_utils.py +44 -62
  85. workbench/utils/pandas_utils.py +3 -3
  86. workbench/utils/pytorch_utils.py +526 -0
  87. workbench/utils/shap_utils.py +10 -2
  88. workbench/utils/workbench_logging.py +0 -3
  89. workbench/utils/workbench_sqs.py +1 -1
  90. workbench/utils/xgboost_model_utils.py +371 -156
  91. workbench/web_interface/components/model_plot.py +7 -1
  92. workbench/web_interface/components/plugin_unit_test.py +5 -2
  93. workbench/web_interface/components/plugins/dashboard_status.py +3 -1
  94. workbench/web_interface/components/plugins/generated_compounds.py +1 -1
  95. workbench/web_interface/components/plugins/model_details.py +9 -7
  96. workbench/web_interface/components/plugins/scatter_plot.py +3 -3
  97. {workbench-0.8.160.dist-info → workbench-0.8.202.dist-info}/METADATA +27 -6
  98. {workbench-0.8.160.dist-info → workbench-0.8.202.dist-info}/RECORD +102 -86
  99. {workbench-0.8.160.dist-info → workbench-0.8.202.dist-info}/entry_points.txt +4 -0
  100. {workbench-0.8.160.dist-info → workbench-0.8.202.dist-info}/licenses/LICENSE +1 -1
  101. workbench/model_scripts/custom_models/chem_info/local_utils.py +0 -769
  102. workbench/model_scripts/custom_models/chem_info/tautomerize.py +0 -83
  103. workbench/model_scripts/custom_models/proximity/generated_model_script.py +0 -138
  104. workbench/model_scripts/custom_models/uq_models/generated_model_script.py +0 -393
  105. workbench/model_scripts/custom_models/uq_models/mapie_xgb.template +0 -203
  106. workbench/model_scripts/ensemble_xgb/generated_model_script.py +0 -279
  107. workbench/model_scripts/quant_regression/quant_regression.template +0 -279
  108. workbench/model_scripts/quant_regression/requirements.txt +0 -1
  109. workbench/utils/chem_utils.py +0 -1556
  110. workbench/utils/execution_environment.py +0 -211
  111. workbench/utils/fast_inference.py +0 -167
  112. workbench/utils/resource_utils.py +0 -39
  113. {workbench-0.8.160.dist-info → workbench-0.8.202.dist-info}/WHEEL +0 -0
  114. {workbench-0.8.160.dist-info → workbench-0.8.202.dist-info}/top_level.txt +0 -0
@@ -4,11 +4,11 @@ None
4
4
  # Template Placeholders
5
5
  TEMPLATE_PARAMS = {
6
6
  "model_type": "regressor",
7
- "target_column": "solubility",
8
- "feature_list": ['molwt', 'mollogp', 'molmr', 'heavyatomcount', 'numhacceptors', 'numhdonors', 'numheteroatoms', 'numrotatablebonds', 'numvalenceelectrons', 'numaromaticrings', 'numsaturatedrings', 'numaliphaticrings', 'ringcount', 'tpsa', 'labuteasa', 'balabanj', 'bertzct'],
7
+ "target_column": "udm_asy_res_efflux_ratio",
8
+ "feature_list": ['chi2v', 'fr_sulfone', 'chi1v', 'bcut2d_logplow', 'fr_piperzine', 'kappa3', 'smr_vsa1', 'slogp_vsa5', 'fr_ketone_topliss', 'fr_sulfonamd', 'fr_imine', 'fr_benzene', 'fr_ester', 'chi2n', 'labuteasa', 'peoe_vsa2', 'smr_vsa6', 'bcut2d_chglo', 'fr_sh', 'peoe_vsa1', 'fr_allylic_oxid', 'chi4n', 'fr_ar_oh', 'fr_nh0', 'fr_term_acetylene', 'slogp_vsa7', 'slogp_vsa4', 'estate_vsa1', 'vsa_estate4', 'numbridgeheadatoms', 'numheterocycles', 'fr_ketone', 'fr_morpholine', 'fr_guanido', 'estate_vsa2', 'numheteroatoms', 'fr_nitro_arom_nonortho', 'fr_piperdine', 'nocount', 'numspiroatoms', 'fr_aniline', 'fr_thiophene', 'slogp_vsa10', 'fr_amide', 'slogp_vsa2', 'fr_epoxide', 'vsa_estate7', 'fr_ar_coo', 'fr_imidazole', 'fr_nitrile', 'fr_oxazole', 'numsaturatedrings', 'fr_pyridine', 'fr_hoccn', 'fr_ndealkylation1', 'numaliphaticheterocycles', 'fr_phenol', 'maxpartialcharge', 'vsa_estate5', 'peoe_vsa13', 'minpartialcharge', 'qed', 'fr_al_oh', 'slogp_vsa11', 'chi0n', 'fr_bicyclic', 'peoe_vsa12', 'fpdensitymorgan1', 'fr_oxime', 'molwt', 'fr_dihydropyridine', 'smr_vsa5', 'peoe_vsa5', 'fr_nitro', 'hallkieralpha', 'heavyatommolwt', 'fr_alkyl_halide', 'peoe_vsa8', 'fr_nhpyrrole', 'fr_isocyan', 'bcut2d_chghi', 'fr_lactam', 'peoe_vsa11', 'smr_vsa9', 'tpsa', 'chi4v', 'slogp_vsa1', 'phi', 'bcut2d_logphi', 'avgipc', 'estate_vsa11', 'fr_coo', 'bcut2d_mwhi', 'numunspecifiedatomstereocenters', 'vsa_estate10', 'estate_vsa8', 'numvalenceelectrons', 'fr_nh2', 'fr_lactone', 'vsa_estate1', 'estate_vsa4', 'numatomstereocenters', 'vsa_estate8', 'fr_para_hydroxylation', 'peoe_vsa3', 'fr_thiazole', 'peoe_vsa10', 'fr_ndealkylation2', 'slogp_vsa12', 'peoe_vsa9', 'maxestateindex', 'fr_quatn', 'smr_vsa7', 'minestateindex', 'numaromaticheterocycles', 'numrotatablebonds', 'fr_ar_nh', 'fr_ether', 'exactmolwt', 'fr_phenol_noorthohbond', 'slogp_vsa3', 'fr_ar_n', 'sps', 'fr_c_o_nocoo', 'bertzct', 'peoe_vsa7', 'slogp_vsa8', 'numradicalelectrons', 'molmr', 'fr_tetrazole', 'numsaturatedcarbocycles', 'bcut2d_mrhi', 'kappa1', 'numamidebonds', 'fpdensitymorgan2', 'smr_vsa8', 'chi1n', 'estate_vsa6', 'fr_barbitur', 'fr_diazo', 'kappa2', 'chi0', 'bcut2d_mrlow', 'balabanj', 'peoe_vsa4', 'numhacceptors', 'fr_sulfide', 'chi3n', 'smr_vsa2', 'fr_al_oh_notert', 'fr_benzodiazepine', 'fr_phos_ester', 'fr_aldehyde', 'fr_coo2', 'estate_vsa5', 'fr_prisulfonamd', 'numaromaticcarbocycles', 'fr_unbrch_alkane', 'fr_urea', 'fr_nitroso', 'smr_vsa10', 'fr_c_s', 'smr_vsa3', 'fr_methoxy', 'maxabspartialcharge', 'slogp_vsa9', 'heavyatomcount', 'fr_azide', 'chi3v', 'smr_vsa4', 'mollogp', 'chi0v', 'fr_aryl_methyl', 'fr_nh1', 'fpdensitymorgan3', 'fr_furan', 'fr_hdrzine', 'fr_arn', 'numaromaticrings', 'vsa_estate3', 'fr_azo', 'fr_halogen', 'estate_vsa9', 'fr_hdrzone', 'numhdonors', 'fr_alkyl_carbamate', 'fr_isothiocyan', 'minabspartialcharge', 'fr_al_coo', 'ringcount', 'chi1', 'estate_vsa7', 'fr_nitro_arom', 'vsa_estate9', 'minabsestateindex', 'maxabsestateindex', 'vsa_estate6', 'estate_vsa10', 'estate_vsa3', 'fr_n_o', 'fr_amidine', 'fr_thiocyan', 'fr_phos_acid', 'fr_c_o', 'fr_imide', 'numaliphaticrings', 'peoe_vsa6', 'vsa_estate2', 'nhohcount', 'numsaturatedheterocycles', 'slogp_vsa6', 'peoe_vsa14', 'fractioncsp3', 'bcut2d_mwlow', 'numaliphaticcarbocycles', 'fr_priamide', 'nacid', 'nbase', 'naromatom', 'narombond', 'sz', 'sm', 'sv', 'sse', 'spe', 'sare', 'sp', 'si', 'mz', 'mm', 'mv', 'mse', 'mpe', 'mare', 'mp', 'mi', 'xch_3d', 'xch_4d', 'xch_5d', 'xch_6d', 'xch_7d', 'xch_3dv', 'xch_4dv', 'xch_5dv', 'xch_6dv', 'xch_7dv', 'xc_3d', 'xc_4d', 'xc_5d', 'xc_6d', 'xc_3dv', 'xc_4dv', 'xc_5dv', 'xc_6dv', 'xpc_4d', 'xpc_5d', 'xpc_6d', 'xpc_4dv', 'xpc_5dv', 'xpc_6dv', 'xp_0d', 'xp_1d', 'xp_2d', 'xp_3d', 'xp_4d', 'xp_5d', 'xp_6d', 'xp_7d', 'axp_0d', 'axp_1d', 'axp_2d', 'axp_3d', 'axp_4d', 'axp_5d', 'axp_6d', 'axp_7d', 'xp_0dv', 'xp_1dv', 'xp_2dv', 'xp_3dv', 'xp_4dv', 'xp_5dv', 'xp_6dv', 'xp_7dv', 'axp_0dv', 'axp_1dv', 'axp_2dv', 'axp_3dv', 'axp_4dv', 'axp_5dv', 'axp_6dv', 'axp_7dv', 'c1sp1', 'c2sp1', 'c1sp2', 'c2sp2', 'c3sp2', 'c1sp3', 'c2sp3', 'c3sp3', 'c4sp3', 'hybratio', 'fcsp3', 'num_stereocenters', 'num_unspecified_stereocenters', 'num_defined_stereocenters', 'num_r_centers', 'num_s_centers', 'num_stereobonds', 'num_e_bonds', 'num_z_bonds', 'stereo_complexity', 'frac_defined_stereo', 'tertiary_amine_count', 'type_i_pattern_count', 'type_ii_pattern_count', 'aromatic_interaction_score', 'molecular_axis_length', 'molecular_asymmetry', 'molecular_volume_3d', 'radius_of_gyration', 'asphericity', 'charge_centroid_distance', 'nitrogen_span', 'amide_count', 'hba_hbd_ratio', 'intramolecular_hbond_potential', 'amphiphilic_moment'],
9
9
  "model_class": PyTorch,
10
- "model_metrics_s3_path": "s3://sandbox-sageworks-artifacts/models/aqsol-pytorch-reg/training",
11
- "train_all_data": False
10
+ "model_metrics_s3_path": "s3://ideaya-sageworks-bucket/models/caco2-er-reg-pytorch-test/training",
11
+ "train_all_data": False,
12
12
  }
13
13
 
14
14
  import awswrangler as wr
@@ -99,10 +99,7 @@ if __name__ == "__main__":
99
99
  args = parser.parse_args()
100
100
 
101
101
  # Load training data from the specified directory
102
- training_files = [
103
- os.path.join(args.train, file)
104
- for file in os.listdir(args.train) if file.endswith(".csv")
105
- ]
102
+ training_files = [os.path.join(args.train, file) for file in os.listdir(args.train) if file.endswith(".csv")]
106
103
  all_df = pd.concat([pd.read_csv(file, engine="python") for file in training_files])
107
104
 
108
105
  # Check if the DataFrame is empty
@@ -116,10 +113,7 @@ if __name__ == "__main__":
116
113
 
117
114
  if needs_standardization:
118
115
  # Create a pipeline with standardization and the model
119
- model = Pipeline([
120
- ("scaler", StandardScaler()),
121
- ("model", model)
122
- ])
116
+ model = Pipeline([("scaler", StandardScaler()), ("model", model)])
123
117
 
124
118
  # Handle logic based on the model_type
125
119
  if model_type in ["classifier", "regressor"]:
@@ -206,6 +200,7 @@ if __name__ == "__main__":
206
200
  with open(os.path.join(args.model_dir, "feature_columns.json"), "w") as fp:
207
201
  json.dump(feature_list, fp)
208
202
 
203
+
209
204
  #
210
205
  # Inference Section
211
206
  #
@@ -8,7 +8,7 @@ TEMPLATE_PARAMS = {
8
8
  "feature_list": "{{feature_list}}",
9
9
  "model_class": "{{model_class}}",
10
10
  "model_metrics_s3_path": "{{model_metrics_s3_path}}",
11
- "train_all_data": "{{train_all_data}}"
11
+ "train_all_data": "{{train_all_data}}",
12
12
  }
13
13
 
14
14
  import awswrangler as wr
@@ -99,10 +99,7 @@ if __name__ == "__main__":
99
99
  args = parser.parse_args()
100
100
 
101
101
  # Load training data from the specified directory
102
- training_files = [
103
- os.path.join(args.train, file)
104
- for file in os.listdir(args.train) if file.endswith(".csv")
105
- ]
102
+ training_files = [os.path.join(args.train, file) for file in os.listdir(args.train) if file.endswith(".csv")]
106
103
  all_df = pd.concat([pd.read_csv(file, engine="python") for file in training_files])
107
104
 
108
105
  # Check if the DataFrame is empty
@@ -116,10 +113,7 @@ if __name__ == "__main__":
116
113
 
117
114
  if needs_standardization:
118
115
  # Create a pipeline with standardization and the model
119
- model = Pipeline([
120
- ("scaler", StandardScaler()),
121
- ("model", model)
122
- ])
116
+ model = Pipeline([("scaler", StandardScaler()), ("model", model)])
123
117
 
124
118
  # Handle logic based on the model_type
125
119
  if model_type in ["classifier", "regressor"]:
@@ -206,6 +200,7 @@ if __name__ == "__main__":
206
200
  with open(os.path.join(args.model_dir, "feature_columns.json"), "w") as fp:
207
201
  json.dump(feature_list, fp)
208
202
 
203
+
209
204
  #
210
205
  # Inference Section
211
206
  #
@@ -68,8 +68,13 @@ def fill_template(template_path: str, params: dict, output_script: str) -> str:
68
68
  template = template.replace(placeholder, str(value))
69
69
 
70
70
  # Sanity check to ensure all placeholders were replaced
71
- if "{{" in template or "}}" in template:
71
+ if "{{" in template and "}}" in template:
72
72
  msg = "Not all template placeholders were replaced. Please check your params."
73
+
74
+ # Show which placeholders are still present
75
+ start = template.index("{{")
76
+ end = template.index("}}", start) + 2
77
+ msg += f" Unreplaced placeholder: {template[start:end]}"
73
78
  log.critical(msg)
74
79
  raise ValueError(msg)
75
80
 
@@ -88,31 +93,35 @@ def generate_model_script(template_params: dict) -> str:
88
93
  template_params (dict): Dictionary containing the parameters:
89
94
  - model_imports (str): Import string for the model class
90
95
  - model_type (ModelType): The enumerated type of model to generate
96
+ - model_framework (str): The enumerated model framework to use
91
97
  - model_class (str): The model class to use (e.g., "RandomForestRegressor")
92
98
  - target_column (str): Column name of the target variable
93
99
  - feature_list (list[str]): A list of columns for the features
94
100
  - model_metrics_s3_path (str): The S3 path to store the model metrics
95
101
  - train_all_data (bool): Whether to train on all (100%) of the data
102
+ - hyperparameters (dict, optional): Hyperparameters for the model (default: None)
96
103
 
97
104
  Returns:
98
105
  str: The name of the generated model script
99
106
  """
100
- from workbench.api import ModelType # Avoid circular import
107
+ from workbench.api import ModelType, ModelFramework # Avoid circular import
101
108
 
102
109
  # Determine which template to use based on model type
103
110
  if template_params.get("model_class"):
104
- if template_params["model_class"].lower() == "pytorch":
105
- template_name = "pytorch.template"
106
- model_script_dir = "pytorch_model"
107
- else:
108
- template_name = "scikit_learn.template"
109
- model_script_dir = "scikit_learn"
111
+ template_name = "scikit_learn.template"
112
+ model_script_dir = "scikit_learn"
113
+ elif template_params["model_framework"] == ModelFramework.PYTORCH_TABULAR:
114
+ template_name = "pytorch.template"
115
+ model_script_dir = "pytorch_model"
116
+ elif template_params["model_framework"] == ModelFramework.CHEMPROP:
117
+ template_name = "chemprop.template"
118
+ model_script_dir = "chemprop"
110
119
  elif template_params["model_type"] in [ModelType.REGRESSOR, ModelType.CLASSIFIER]:
111
120
  template_name = "xgb_model.template"
112
121
  model_script_dir = "xgb_model"
113
122
  elif template_params["model_type"] == ModelType.UQ_REGRESSOR:
114
- template_name = "quant_regression.template"
115
- model_script_dir = "quant_regression"
123
+ template_name = "mapie.template"
124
+ model_script_dir = "uq_models"
116
125
  elif template_params["model_type"] == ModelType.ENSEMBLE_REGRESSOR:
117
126
  template_name = "ensemble_xgb.template"
118
127
  model_script_dir = "ensemble_xgb"