workbench 0.8.197__py3-none-any.whl → 0.8.201__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- workbench/algorithms/dataframe/proximity.py +19 -12
- workbench/api/__init__.py +2 -1
- workbench/api/feature_set.py +7 -4
- workbench/api/model.py +1 -1
- workbench/core/artifacts/__init__.py +11 -2
- workbench/core/artifacts/endpoint_core.py +84 -46
- workbench/core/artifacts/feature_set_core.py +69 -1
- workbench/core/artifacts/model_core.py +37 -7
- workbench/core/cloud_platform/aws/aws_parameter_store.py +18 -2
- workbench/core/transforms/features_to_model/features_to_model.py +23 -20
- workbench/core/views/view.py +2 -2
- workbench/model_scripts/chemprop/chemprop.template +931 -0
- workbench/model_scripts/chemprop/generated_model_script.py +931 -0
- workbench/model_scripts/chemprop/requirements.txt +11 -0
- workbench/model_scripts/custom_models/chem_info/fingerprints.py +134 -0
- workbench/model_scripts/custom_models/chem_info/morgan_fingerprints.py +1 -1
- workbench/model_scripts/custom_models/proximity/proximity.py +19 -12
- workbench/model_scripts/custom_models/uq_models/proximity.py +19 -12
- workbench/model_scripts/pytorch_model/generated_model_script.py +130 -88
- workbench/model_scripts/pytorch_model/pytorch.template +128 -86
- workbench/model_scripts/scikit_learn/generated_model_script.py +302 -0
- workbench/model_scripts/script_generation.py +10 -7
- workbench/model_scripts/uq_models/generated_model_script.py +25 -18
- workbench/model_scripts/uq_models/mapie.template +23 -16
- workbench/model_scripts/xgb_model/generated_model_script.py +6 -6
- workbench/model_scripts/xgb_model/xgb_model.template +2 -2
- workbench/repl/workbench_shell.py +14 -5
- workbench/scripts/endpoint_test.py +162 -0
- workbench/scripts/{lambda_launcher.py → lambda_test.py} +10 -0
- workbench/utils/chemprop_utils.py +724 -0
- workbench/utils/pytorch_utils.py +497 -0
- workbench/utils/xgboost_model_utils.py +12 -5
- {workbench-0.8.197.dist-info → workbench-0.8.201.dist-info}/METADATA +2 -2
- {workbench-0.8.197.dist-info → workbench-0.8.201.dist-info}/RECORD +38 -30
- {workbench-0.8.197.dist-info → workbench-0.8.201.dist-info}/entry_points.txt +2 -1
- {workbench-0.8.197.dist-info → workbench-0.8.201.dist-info}/WHEEL +0 -0
- {workbench-0.8.197.dist-info → workbench-0.8.201.dist-info}/licenses/LICENSE +0 -0
- {workbench-0.8.197.dist-info → workbench-0.8.201.dist-info}/top_level.txt +0 -0
|
@@ -9,7 +9,7 @@ import time
|
|
|
9
9
|
# Local Imports
|
|
10
10
|
from workbench.core.transforms.transform import Transform, TransformInput, TransformOutput
|
|
11
11
|
from workbench.core.artifacts.feature_set_core import FeatureSetCore
|
|
12
|
-
from workbench.core.artifacts.model_core import ModelCore, ModelType, ModelImages
|
|
12
|
+
from workbench.core.artifacts.model_core import ModelCore, ModelType, ModelFramework, ModelImages
|
|
13
13
|
from workbench.core.artifacts.artifact import Artifact
|
|
14
14
|
from workbench.model_scripts.script_generation import generate_model_script, fill_template
|
|
15
15
|
from workbench.utils.model_utils import supported_instance_types
|
|
@@ -33,6 +33,7 @@ class FeaturesToModel(Transform):
|
|
|
33
33
|
feature_name: str,
|
|
34
34
|
model_name: str,
|
|
35
35
|
model_type: ModelType,
|
|
36
|
+
model_framework=ModelFramework.XGBOOST,
|
|
36
37
|
model_class=None,
|
|
37
38
|
model_import_str=None,
|
|
38
39
|
custom_script=None,
|
|
@@ -46,6 +47,7 @@ class FeaturesToModel(Transform):
|
|
|
46
47
|
feature_name (str): Name of the FeatureSet to use as input
|
|
47
48
|
model_name (str): Name of the Model to create as output
|
|
48
49
|
model_type (ModelType): ModelType.REGRESSOR or ModelType.CLASSIFIER, etc.
|
|
50
|
+
model_framework (ModelFramework, optional): The model framework (default ModelFramework.XGBOOST)
|
|
49
51
|
model_class (str, optional): The scikit model (e.g. KNeighborsRegressor) (default None)
|
|
50
52
|
model_import_str (str, optional): The import string for the model (default None)
|
|
51
53
|
custom_script (str, optional): Custom script to use for the model (default None)
|
|
@@ -65,6 +67,7 @@ class FeaturesToModel(Transform):
|
|
|
65
67
|
self.input_type = TransformInput.FEATURE_SET
|
|
66
68
|
self.output_type = TransformOutput.MODEL
|
|
67
69
|
self.model_type = model_type
|
|
70
|
+
self.model_framework = model_framework
|
|
68
71
|
self.model_class = model_class
|
|
69
72
|
self.model_import_str = model_import_str
|
|
70
73
|
self.custom_script = str(custom_script) if custom_script else None
|
|
@@ -157,6 +160,7 @@ class FeaturesToModel(Transform):
|
|
|
157
160
|
template_params = {
|
|
158
161
|
"model_imports": self.model_import_str,
|
|
159
162
|
"model_type": self.model_type,
|
|
163
|
+
"model_framework": self.model_framework,
|
|
160
164
|
"model_class": self.model_class,
|
|
161
165
|
"target_column": self.target_column,
|
|
162
166
|
"feature_list": self.model_feature_list,
|
|
@@ -234,12 +238,20 @@ class FeaturesToModel(Transform):
|
|
|
234
238
|
|
|
235
239
|
# Create a Sagemaker Model with our script
|
|
236
240
|
image = ModelImages.get_image_uri(self.sm_session.boto_region_name, self.training_image)
|
|
241
|
+
|
|
242
|
+
# Use GPU instance for ChemProp/PyTorch, CPU for others
|
|
243
|
+
if self.model_framework in [ModelFramework.CHEMPROP, ModelFramework.PYTORCH_TABULAR]:
|
|
244
|
+
train_instance_type = "ml.g6.xlarge" # NVIDIA L4 GPU, ~$0.80/hr
|
|
245
|
+
self.log.important(f"Using GPU instance {train_instance_type} for {self.model_framework.value}")
|
|
246
|
+
else:
|
|
247
|
+
train_instance_type = "ml.m5.xlarge"
|
|
248
|
+
|
|
237
249
|
self.estimator = Estimator(
|
|
238
250
|
entry_point=entry_point,
|
|
239
251
|
source_dir=source_dir,
|
|
240
252
|
role=self.workbench_role_arn,
|
|
241
253
|
instance_count=1,
|
|
242
|
-
instance_type=
|
|
254
|
+
instance_type=train_instance_type,
|
|
243
255
|
sagemaker_session=self.sm_session,
|
|
244
256
|
image_uri=image,
|
|
245
257
|
metric_definitions=metric_definitions,
|
|
@@ -274,8 +286,10 @@ class FeaturesToModel(Transform):
|
|
|
274
286
|
self.log.info("Post-Transform: Calling onboard() on the Model...")
|
|
275
287
|
time.sleep(3) # Give AWS time to complete Model register
|
|
276
288
|
|
|
277
|
-
# Store the model
|
|
278
|
-
output_model = ModelCore(self.output_name
|
|
289
|
+
# Store the model metadata information
|
|
290
|
+
output_model = ModelCore(self.output_name)
|
|
291
|
+
output_model._set_model_type(self.model_type)
|
|
292
|
+
output_model._set_model_framework(self.model_framework)
|
|
279
293
|
output_model.upsert_workbench_meta({"workbench_model_features": self.model_feature_list})
|
|
280
294
|
output_model.upsert_workbench_meta({"workbench_model_target": self.target_column})
|
|
281
295
|
|
|
@@ -330,12 +344,11 @@ if __name__ == "__main__":
|
|
|
330
344
|
|
|
331
345
|
# Regression Model
|
|
332
346
|
input_name = "abalone_features"
|
|
333
|
-
output_name = "
|
|
347
|
+
output_name = "abalone-regression"
|
|
334
348
|
to_model = FeaturesToModel(input_name, output_name, model_type=ModelType.REGRESSOR)
|
|
335
349
|
to_model.set_output_tags(["test"])
|
|
336
350
|
to_model.transform(target_column="class_number_of_rings", description="Test Abalone Regression")
|
|
337
351
|
|
|
338
|
-
"""
|
|
339
352
|
# Classification Model
|
|
340
353
|
input_name = "wine_features"
|
|
341
354
|
output_name = "wine-classification"
|
|
@@ -345,10 +358,10 @@ if __name__ == "__main__":
|
|
|
345
358
|
|
|
346
359
|
# Quantile Regression Model (Abalone)
|
|
347
360
|
input_name = "abalone_features"
|
|
348
|
-
output_name = "abalone-
|
|
361
|
+
output_name = "abalone-regression-uq"
|
|
349
362
|
to_model = FeaturesToModel(input_name, output_name, ModelType.UQ_REGRESSOR)
|
|
350
|
-
to_model.set_output_tags(["abalone", "
|
|
351
|
-
to_model.transform(target_column="class_number_of_rings", description="Abalone
|
|
363
|
+
to_model.set_output_tags(["abalone", "uq"])
|
|
364
|
+
to_model.transform(target_column="class_number_of_rings", description="Abalone UQ Regression")
|
|
352
365
|
|
|
353
366
|
# Scikit-Learn Kmeans Clustering Model
|
|
354
367
|
input_name = "wine_features"
|
|
@@ -402,7 +415,7 @@ if __name__ == "__main__":
|
|
|
402
415
|
scripts_root = Path(__file__).resolve().parents[3] / "model_scripts"
|
|
403
416
|
my_script = scripts_root / "custom_models" / "chem_info" / "molecular_descriptors.py"
|
|
404
417
|
input_name = "aqsol_features"
|
|
405
|
-
output_name = "smiles-to-taut-md-stereo
|
|
418
|
+
output_name = "test-smiles-to-taut-md-stereo"
|
|
406
419
|
to_model = FeaturesToModel(input_name, output_name, model_type=ModelType.TRANSFORMER, custom_script=my_script)
|
|
407
420
|
to_model.set_output_tags(["smiles", "molecular descriptors"])
|
|
408
421
|
to_model.transform(target_column=None, feature_list=["smiles"], description="Smiles to Molecular Descriptors")
|
|
@@ -415,13 +428,3 @@ if __name__ == "__main__":
|
|
|
415
428
|
to_model = FeaturesToModel(input_name, output_name, model_type=ModelType.TRANSFORMER, custom_script=my_script)
|
|
416
429
|
to_model.set_output_tags(["smiles", "morgan fingerprints"])
|
|
417
430
|
to_model.transform(target_column=None, feature_list=["smiles"], description="Smiles to Morgan Fingerprints")
|
|
418
|
-
|
|
419
|
-
# Tautomerization Model
|
|
420
|
-
scripts_root = Path(__file__).resolve().parents[3] / "model_scripts"
|
|
421
|
-
my_script = scripts_root / "custom_models" / "chem_info" / "tautomerize.py"
|
|
422
|
-
input_name = "aqsol_features"
|
|
423
|
-
output_name = "tautomerize-v0"
|
|
424
|
-
to_model = FeaturesToModel(input_name, output_name, model_type=ModelType.TRANSFORMER, custom_script=my_script)
|
|
425
|
-
to_model.set_output_tags(["smiles", "tautomerization"])
|
|
426
|
-
to_model.transform(target_column=None, feature_list=["smiles"], description="Tautomerize Smiles")
|
|
427
|
-
"""
|
workbench/core/views/view.py
CHANGED
|
@@ -91,11 +91,11 @@ class View:
|
|
|
91
91
|
self.table, self.data_source.database, self.data_source.boto3_session
|
|
92
92
|
)
|
|
93
93
|
|
|
94
|
-
def pull_dataframe(self, limit: int =
|
|
94
|
+
def pull_dataframe(self, limit: int = 100000) -> Union[pd.DataFrame, None]:
|
|
95
95
|
"""Pull a DataFrame based on the view type
|
|
96
96
|
|
|
97
97
|
Args:
|
|
98
|
-
limit (int): The maximum number of rows to pull (default:
|
|
98
|
+
limit (int): The maximum number of rows to pull (default: 100000)
|
|
99
99
|
|
|
100
100
|
Returns:
|
|
101
101
|
Union[pd.DataFrame, None]: The DataFrame for the view or None if it doesn't exist
|