workbench 0.8.162__py3-none-any.whl → 0.8.202__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of workbench might be problematic. Click here for more details.
- workbench/algorithms/dataframe/__init__.py +1 -2
- workbench/algorithms/dataframe/fingerprint_proximity.py +2 -2
- workbench/algorithms/dataframe/proximity.py +261 -235
- workbench/algorithms/graph/light/proximity_graph.py +10 -8
- workbench/api/__init__.py +2 -1
- workbench/api/compound.py +1 -1
- workbench/api/endpoint.py +11 -0
- workbench/api/feature_set.py +11 -8
- workbench/api/meta.py +5 -2
- workbench/api/model.py +16 -15
- workbench/api/monitor.py +1 -16
- workbench/core/artifacts/__init__.py +11 -2
- workbench/core/artifacts/artifact.py +11 -3
- workbench/core/artifacts/data_capture_core.py +355 -0
- workbench/core/artifacts/endpoint_core.py +256 -118
- workbench/core/artifacts/feature_set_core.py +265 -16
- workbench/core/artifacts/model_core.py +107 -60
- workbench/core/artifacts/monitor_core.py +33 -248
- workbench/core/cloud_platform/aws/aws_account_clamp.py +50 -1
- workbench/core/cloud_platform/aws/aws_meta.py +12 -5
- workbench/core/cloud_platform/aws/aws_parameter_store.py +18 -2
- workbench/core/cloud_platform/aws/aws_session.py +4 -4
- workbench/core/transforms/data_to_features/light/molecular_descriptors.py +4 -4
- workbench/core/transforms/features_to_model/features_to_model.py +42 -32
- workbench/core/transforms/model_to_endpoint/model_to_endpoint.py +36 -6
- workbench/core/transforms/pandas_transforms/pandas_to_features.py +27 -0
- workbench/core/views/training_view.py +113 -42
- workbench/core/views/view.py +53 -3
- workbench/core/views/view_utils.py +4 -4
- workbench/model_scripts/chemprop/chemprop.template +852 -0
- workbench/model_scripts/chemprop/generated_model_script.py +852 -0
- workbench/model_scripts/chemprop/requirements.txt +11 -0
- workbench/model_scripts/custom_models/chem_info/fingerprints.py +134 -0
- workbench/model_scripts/custom_models/chem_info/mol_descriptors.py +483 -0
- workbench/model_scripts/custom_models/chem_info/mol_standardize.py +450 -0
- workbench/model_scripts/custom_models/chem_info/molecular_descriptors.py +7 -9
- workbench/model_scripts/custom_models/chem_info/morgan_fingerprints.py +1 -1
- workbench/model_scripts/custom_models/proximity/feature_space_proximity.template +3 -5
- workbench/model_scripts/custom_models/proximity/proximity.py +261 -235
- workbench/model_scripts/custom_models/uq_models/bayesian_ridge.template +7 -8
- workbench/model_scripts/custom_models/uq_models/ensemble_xgb.template +20 -21
- workbench/model_scripts/custom_models/uq_models/gaussian_process.template +5 -11
- workbench/model_scripts/custom_models/uq_models/meta_uq.template +166 -62
- workbench/model_scripts/custom_models/uq_models/ngboost.template +30 -18
- workbench/model_scripts/custom_models/uq_models/proximity.py +261 -235
- workbench/model_scripts/custom_models/uq_models/requirements.txt +1 -3
- workbench/model_scripts/ensemble_xgb/ensemble_xgb.template +15 -17
- workbench/model_scripts/pytorch_model/generated_model_script.py +373 -190
- workbench/model_scripts/pytorch_model/pytorch.template +370 -187
- workbench/model_scripts/scikit_learn/generated_model_script.py +7 -12
- workbench/model_scripts/scikit_learn/scikit_learn.template +4 -9
- workbench/model_scripts/script_generation.py +17 -9
- workbench/model_scripts/uq_models/generated_model_script.py +605 -0
- workbench/model_scripts/uq_models/mapie.template +605 -0
- workbench/model_scripts/uq_models/requirements.txt +1 -0
- workbench/model_scripts/xgb_model/generated_model_script.py +37 -46
- workbench/model_scripts/xgb_model/xgb_model.template +44 -46
- workbench/repl/workbench_shell.py +28 -14
- workbench/scripts/endpoint_test.py +162 -0
- workbench/scripts/lambda_test.py +73 -0
- workbench/scripts/ml_pipeline_batch.py +137 -0
- workbench/scripts/ml_pipeline_sqs.py +186 -0
- workbench/scripts/monitor_cloud_watch.py +20 -100
- workbench/utils/aws_utils.py +4 -3
- workbench/utils/chem_utils/__init__.py +0 -0
- workbench/utils/chem_utils/fingerprints.py +134 -0
- workbench/utils/chem_utils/misc.py +194 -0
- workbench/utils/chem_utils/mol_descriptors.py +483 -0
- workbench/utils/chem_utils/mol_standardize.py +450 -0
- workbench/utils/chem_utils/mol_tagging.py +348 -0
- workbench/utils/chem_utils/projections.py +209 -0
- workbench/utils/chem_utils/salts.py +256 -0
- workbench/utils/chem_utils/sdf.py +292 -0
- workbench/utils/chem_utils/toxicity.py +250 -0
- workbench/utils/chem_utils/vis.py +253 -0
- workbench/utils/chemprop_utils.py +760 -0
- workbench/utils/cloudwatch_handler.py +1 -1
- workbench/utils/cloudwatch_utils.py +137 -0
- workbench/utils/config_manager.py +3 -7
- workbench/utils/endpoint_utils.py +5 -7
- workbench/utils/license_manager.py +2 -6
- workbench/utils/model_utils.py +95 -34
- workbench/utils/monitor_utils.py +44 -62
- workbench/utils/pandas_utils.py +3 -3
- workbench/utils/pytorch_utils.py +526 -0
- workbench/utils/shap_utils.py +10 -2
- workbench/utils/workbench_logging.py +0 -3
- workbench/utils/workbench_sqs.py +1 -1
- workbench/utils/xgboost_model_utils.py +371 -156
- workbench/web_interface/components/model_plot.py +7 -1
- workbench/web_interface/components/plugin_unit_test.py +5 -2
- workbench/web_interface/components/plugins/dashboard_status.py +3 -1
- workbench/web_interface/components/plugins/generated_compounds.py +1 -1
- workbench/web_interface/components/plugins/model_details.py +9 -7
- workbench/web_interface/components/plugins/scatter_plot.py +3 -3
- {workbench-0.8.162.dist-info → workbench-0.8.202.dist-info}/METADATA +27 -6
- {workbench-0.8.162.dist-info → workbench-0.8.202.dist-info}/RECORD +101 -85
- {workbench-0.8.162.dist-info → workbench-0.8.202.dist-info}/entry_points.txt +4 -0
- {workbench-0.8.162.dist-info → workbench-0.8.202.dist-info}/licenses/LICENSE +1 -1
- workbench/model_scripts/custom_models/chem_info/local_utils.py +0 -769
- workbench/model_scripts/custom_models/chem_info/tautomerize.py +0 -83
- workbench/model_scripts/custom_models/proximity/generated_model_script.py +0 -138
- workbench/model_scripts/custom_models/uq_models/generated_model_script.py +0 -393
- workbench/model_scripts/custom_models/uq_models/mapie_xgb.template +0 -203
- workbench/model_scripts/ensemble_xgb/generated_model_script.py +0 -279
- workbench/model_scripts/quant_regression/quant_regression.template +0 -279
- workbench/model_scripts/quant_regression/requirements.txt +0 -1
- workbench/utils/chem_utils.py +0 -1556
- workbench/utils/execution_environment.py +0 -211
- workbench/utils/fast_inference.py +0 -167
- workbench/utils/resource_utils.py +0 -39
- {workbench-0.8.162.dist-info → workbench-0.8.202.dist-info}/WHEEL +0 -0
- {workbench-0.8.162.dist-info → workbench-0.8.202.dist-info}/top_level.txt +0 -0
|
@@ -4,11 +4,11 @@ None
|
|
|
4
4
|
# Template Placeholders
|
|
5
5
|
TEMPLATE_PARAMS = {
|
|
6
6
|
"model_type": "regressor",
|
|
7
|
-
"target_column": "
|
|
8
|
-
"feature_list": ['molwt', '
|
|
7
|
+
"target_column": "udm_asy_res_efflux_ratio",
|
|
8
|
+
"feature_list": ['chi2v', 'fr_sulfone', 'chi1v', 'bcut2d_logplow', 'fr_piperzine', 'kappa3', 'smr_vsa1', 'slogp_vsa5', 'fr_ketone_topliss', 'fr_sulfonamd', 'fr_imine', 'fr_benzene', 'fr_ester', 'chi2n', 'labuteasa', 'peoe_vsa2', 'smr_vsa6', 'bcut2d_chglo', 'fr_sh', 'peoe_vsa1', 'fr_allylic_oxid', 'chi4n', 'fr_ar_oh', 'fr_nh0', 'fr_term_acetylene', 'slogp_vsa7', 'slogp_vsa4', 'estate_vsa1', 'vsa_estate4', 'numbridgeheadatoms', 'numheterocycles', 'fr_ketone', 'fr_morpholine', 'fr_guanido', 'estate_vsa2', 'numheteroatoms', 'fr_nitro_arom_nonortho', 'fr_piperdine', 'nocount', 'numspiroatoms', 'fr_aniline', 'fr_thiophene', 'slogp_vsa10', 'fr_amide', 'slogp_vsa2', 'fr_epoxide', 'vsa_estate7', 'fr_ar_coo', 'fr_imidazole', 'fr_nitrile', 'fr_oxazole', 'numsaturatedrings', 'fr_pyridine', 'fr_hoccn', 'fr_ndealkylation1', 'numaliphaticheterocycles', 'fr_phenol', 'maxpartialcharge', 'vsa_estate5', 'peoe_vsa13', 'minpartialcharge', 'qed', 'fr_al_oh', 'slogp_vsa11', 'chi0n', 'fr_bicyclic', 'peoe_vsa12', 'fpdensitymorgan1', 'fr_oxime', 'molwt', 'fr_dihydropyridine', 'smr_vsa5', 'peoe_vsa5', 'fr_nitro', 'hallkieralpha', 'heavyatommolwt', 'fr_alkyl_halide', 'peoe_vsa8', 'fr_nhpyrrole', 'fr_isocyan', 'bcut2d_chghi', 'fr_lactam', 'peoe_vsa11', 'smr_vsa9', 'tpsa', 'chi4v', 'slogp_vsa1', 'phi', 'bcut2d_logphi', 'avgipc', 'estate_vsa11', 'fr_coo', 'bcut2d_mwhi', 'numunspecifiedatomstereocenters', 'vsa_estate10', 'estate_vsa8', 'numvalenceelectrons', 'fr_nh2', 'fr_lactone', 'vsa_estate1', 'estate_vsa4', 'numatomstereocenters', 'vsa_estate8', 'fr_para_hydroxylation', 'peoe_vsa3', 'fr_thiazole', 'peoe_vsa10', 'fr_ndealkylation2', 'slogp_vsa12', 'peoe_vsa9', 'maxestateindex', 'fr_quatn', 'smr_vsa7', 'minestateindex', 'numaromaticheterocycles', 'numrotatablebonds', 'fr_ar_nh', 'fr_ether', 'exactmolwt', 'fr_phenol_noorthohbond', 'slogp_vsa3', 'fr_ar_n', 'sps', 'fr_c_o_nocoo', 'bertzct', 'peoe_vsa7', 'slogp_vsa8', 'numradicalelectrons', 'molmr', 'fr_tetrazole', 'numsaturatedcarbocycles', 'bcut2d_mrhi', 'kappa1', 'numamidebonds', 'fpdensitymorgan2', 'smr_vsa8', 'chi1n', 'estate_vsa6', 'fr_barbitur', 'fr_diazo', 'kappa2', 'chi0', 'bcut2d_mrlow', 'balabanj', 'peoe_vsa4', 'numhacceptors', 'fr_sulfide', 'chi3n', 'smr_vsa2', 'fr_al_oh_notert', 'fr_benzodiazepine', 'fr_phos_ester', 'fr_aldehyde', 'fr_coo2', 'estate_vsa5', 'fr_prisulfonamd', 'numaromaticcarbocycles', 'fr_unbrch_alkane', 'fr_urea', 'fr_nitroso', 'smr_vsa10', 'fr_c_s', 'smr_vsa3', 'fr_methoxy', 'maxabspartialcharge', 'slogp_vsa9', 'heavyatomcount', 'fr_azide', 'chi3v', 'smr_vsa4', 'mollogp', 'chi0v', 'fr_aryl_methyl', 'fr_nh1', 'fpdensitymorgan3', 'fr_furan', 'fr_hdrzine', 'fr_arn', 'numaromaticrings', 'vsa_estate3', 'fr_azo', 'fr_halogen', 'estate_vsa9', 'fr_hdrzone', 'numhdonors', 'fr_alkyl_carbamate', 'fr_isothiocyan', 'minabspartialcharge', 'fr_al_coo', 'ringcount', 'chi1', 'estate_vsa7', 'fr_nitro_arom', 'vsa_estate9', 'minabsestateindex', 'maxabsestateindex', 'vsa_estate6', 'estate_vsa10', 'estate_vsa3', 'fr_n_o', 'fr_amidine', 'fr_thiocyan', 'fr_phos_acid', 'fr_c_o', 'fr_imide', 'numaliphaticrings', 'peoe_vsa6', 'vsa_estate2', 'nhohcount', 'numsaturatedheterocycles', 'slogp_vsa6', 'peoe_vsa14', 'fractioncsp3', 'bcut2d_mwlow', 'numaliphaticcarbocycles', 'fr_priamide', 'nacid', 'nbase', 'naromatom', 'narombond', 'sz', 'sm', 'sv', 'sse', 'spe', 'sare', 'sp', 'si', 'mz', 'mm', 'mv', 'mse', 'mpe', 'mare', 'mp', 'mi', 'xch_3d', 'xch_4d', 'xch_5d', 'xch_6d', 'xch_7d', 'xch_3dv', 'xch_4dv', 'xch_5dv', 'xch_6dv', 'xch_7dv', 'xc_3d', 'xc_4d', 'xc_5d', 'xc_6d', 'xc_3dv', 'xc_4dv', 'xc_5dv', 'xc_6dv', 'xpc_4d', 'xpc_5d', 'xpc_6d', 'xpc_4dv', 'xpc_5dv', 'xpc_6dv', 'xp_0d', 'xp_1d', 'xp_2d', 'xp_3d', 'xp_4d', 'xp_5d', 'xp_6d', 'xp_7d', 'axp_0d', 'axp_1d', 'axp_2d', 'axp_3d', 'axp_4d', 'axp_5d', 'axp_6d', 'axp_7d', 'xp_0dv', 'xp_1dv', 'xp_2dv', 'xp_3dv', 'xp_4dv', 'xp_5dv', 'xp_6dv', 'xp_7dv', 'axp_0dv', 'axp_1dv', 'axp_2dv', 'axp_3dv', 'axp_4dv', 'axp_5dv', 'axp_6dv', 'axp_7dv', 'c1sp1', 'c2sp1', 'c1sp2', 'c2sp2', 'c3sp2', 'c1sp3', 'c2sp3', 'c3sp3', 'c4sp3', 'hybratio', 'fcsp3', 'num_stereocenters', 'num_unspecified_stereocenters', 'num_defined_stereocenters', 'num_r_centers', 'num_s_centers', 'num_stereobonds', 'num_e_bonds', 'num_z_bonds', 'stereo_complexity', 'frac_defined_stereo', 'tertiary_amine_count', 'type_i_pattern_count', 'type_ii_pattern_count', 'aromatic_interaction_score', 'molecular_axis_length', 'molecular_asymmetry', 'molecular_volume_3d', 'radius_of_gyration', 'asphericity', 'charge_centroid_distance', 'nitrogen_span', 'amide_count', 'hba_hbd_ratio', 'intramolecular_hbond_potential', 'amphiphilic_moment'],
|
|
9
9
|
"model_class": PyTorch,
|
|
10
|
-
"model_metrics_s3_path": "s3://
|
|
11
|
-
"train_all_data": False
|
|
10
|
+
"model_metrics_s3_path": "s3://ideaya-sageworks-bucket/models/caco2-er-reg-pytorch-test/training",
|
|
11
|
+
"train_all_data": False,
|
|
12
12
|
}
|
|
13
13
|
|
|
14
14
|
import awswrangler as wr
|
|
@@ -99,10 +99,7 @@ if __name__ == "__main__":
|
|
|
99
99
|
args = parser.parse_args()
|
|
100
100
|
|
|
101
101
|
# Load training data from the specified directory
|
|
102
|
-
training_files = [
|
|
103
|
-
os.path.join(args.train, file)
|
|
104
|
-
for file in os.listdir(args.train) if file.endswith(".csv")
|
|
105
|
-
]
|
|
102
|
+
training_files = [os.path.join(args.train, file) for file in os.listdir(args.train) if file.endswith(".csv")]
|
|
106
103
|
all_df = pd.concat([pd.read_csv(file, engine="python") for file in training_files])
|
|
107
104
|
|
|
108
105
|
# Check if the DataFrame is empty
|
|
@@ -116,10 +113,7 @@ if __name__ == "__main__":
|
|
|
116
113
|
|
|
117
114
|
if needs_standardization:
|
|
118
115
|
# Create a pipeline with standardization and the model
|
|
119
|
-
model = Pipeline([
|
|
120
|
-
("scaler", StandardScaler()),
|
|
121
|
-
("model", model)
|
|
122
|
-
])
|
|
116
|
+
model = Pipeline([("scaler", StandardScaler()), ("model", model)])
|
|
123
117
|
|
|
124
118
|
# Handle logic based on the model_type
|
|
125
119
|
if model_type in ["classifier", "regressor"]:
|
|
@@ -206,6 +200,7 @@ if __name__ == "__main__":
|
|
|
206
200
|
with open(os.path.join(args.model_dir, "feature_columns.json"), "w") as fp:
|
|
207
201
|
json.dump(feature_list, fp)
|
|
208
202
|
|
|
203
|
+
|
|
209
204
|
#
|
|
210
205
|
# Inference Section
|
|
211
206
|
#
|
|
@@ -8,7 +8,7 @@ TEMPLATE_PARAMS = {
|
|
|
8
8
|
"feature_list": "{{feature_list}}",
|
|
9
9
|
"model_class": "{{model_class}}",
|
|
10
10
|
"model_metrics_s3_path": "{{model_metrics_s3_path}}",
|
|
11
|
-
"train_all_data": "{{train_all_data}}"
|
|
11
|
+
"train_all_data": "{{train_all_data}}",
|
|
12
12
|
}
|
|
13
13
|
|
|
14
14
|
import awswrangler as wr
|
|
@@ -99,10 +99,7 @@ if __name__ == "__main__":
|
|
|
99
99
|
args = parser.parse_args()
|
|
100
100
|
|
|
101
101
|
# Load training data from the specified directory
|
|
102
|
-
training_files = [
|
|
103
|
-
os.path.join(args.train, file)
|
|
104
|
-
for file in os.listdir(args.train) if file.endswith(".csv")
|
|
105
|
-
]
|
|
102
|
+
training_files = [os.path.join(args.train, file) for file in os.listdir(args.train) if file.endswith(".csv")]
|
|
106
103
|
all_df = pd.concat([pd.read_csv(file, engine="python") for file in training_files])
|
|
107
104
|
|
|
108
105
|
# Check if the DataFrame is empty
|
|
@@ -116,10 +113,7 @@ if __name__ == "__main__":
|
|
|
116
113
|
|
|
117
114
|
if needs_standardization:
|
|
118
115
|
# Create a pipeline with standardization and the model
|
|
119
|
-
model = Pipeline([
|
|
120
|
-
("scaler", StandardScaler()),
|
|
121
|
-
("model", model)
|
|
122
|
-
])
|
|
116
|
+
model = Pipeline([("scaler", StandardScaler()), ("model", model)])
|
|
123
117
|
|
|
124
118
|
# Handle logic based on the model_type
|
|
125
119
|
if model_type in ["classifier", "regressor"]:
|
|
@@ -206,6 +200,7 @@ if __name__ == "__main__":
|
|
|
206
200
|
with open(os.path.join(args.model_dir, "feature_columns.json"), "w") as fp:
|
|
207
201
|
json.dump(feature_list, fp)
|
|
208
202
|
|
|
203
|
+
|
|
209
204
|
#
|
|
210
205
|
# Inference Section
|
|
211
206
|
#
|
|
@@ -70,6 +70,11 @@ def fill_template(template_path: str, params: dict, output_script: str) -> str:
|
|
|
70
70
|
# Sanity check to ensure all placeholders were replaced
|
|
71
71
|
if "{{" in template and "}}" in template:
|
|
72
72
|
msg = "Not all template placeholders were replaced. Please check your params."
|
|
73
|
+
|
|
74
|
+
# Show which placeholders are still present
|
|
75
|
+
start = template.index("{{")
|
|
76
|
+
end = template.index("}}", start) + 2
|
|
77
|
+
msg += f" Unreplaced placeholder: {template[start:end]}"
|
|
73
78
|
log.critical(msg)
|
|
74
79
|
raise ValueError(msg)
|
|
75
80
|
|
|
@@ -88,6 +93,7 @@ def generate_model_script(template_params: dict) -> str:
|
|
|
88
93
|
template_params (dict): Dictionary containing the parameters:
|
|
89
94
|
- model_imports (str): Import string for the model class
|
|
90
95
|
- model_type (ModelType): The enumerated type of model to generate
|
|
96
|
+
- model_framework (str): The enumerated model framework to use
|
|
91
97
|
- model_class (str): The model class to use (e.g., "RandomForestRegressor")
|
|
92
98
|
- target_column (str): Column name of the target variable
|
|
93
99
|
- feature_list (list[str]): A list of columns for the features
|
|
@@ -98,22 +104,24 @@ def generate_model_script(template_params: dict) -> str:
|
|
|
98
104
|
Returns:
|
|
99
105
|
str: The name of the generated model script
|
|
100
106
|
"""
|
|
101
|
-
from workbench.api import ModelType # Avoid circular import
|
|
107
|
+
from workbench.api import ModelType, ModelFramework # Avoid circular import
|
|
102
108
|
|
|
103
109
|
# Determine which template to use based on model type
|
|
104
110
|
if template_params.get("model_class"):
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
+
template_name = "scikit_learn.template"
|
|
112
|
+
model_script_dir = "scikit_learn"
|
|
113
|
+
elif template_params["model_framework"] == ModelFramework.PYTORCH_TABULAR:
|
|
114
|
+
template_name = "pytorch.template"
|
|
115
|
+
model_script_dir = "pytorch_model"
|
|
116
|
+
elif template_params["model_framework"] == ModelFramework.CHEMPROP:
|
|
117
|
+
template_name = "chemprop.template"
|
|
118
|
+
model_script_dir = "chemprop"
|
|
111
119
|
elif template_params["model_type"] in [ModelType.REGRESSOR, ModelType.CLASSIFIER]:
|
|
112
120
|
template_name = "xgb_model.template"
|
|
113
121
|
model_script_dir = "xgb_model"
|
|
114
122
|
elif template_params["model_type"] == ModelType.UQ_REGRESSOR:
|
|
115
|
-
template_name = "
|
|
116
|
-
model_script_dir = "
|
|
123
|
+
template_name = "mapie.template"
|
|
124
|
+
model_script_dir = "uq_models"
|
|
117
125
|
elif template_params["model_type"] == ModelType.ENSEMBLE_REGRESSOR:
|
|
118
126
|
template_name = "ensemble_xgb.template"
|
|
119
127
|
model_script_dir = "ensemble_xgb"
|